1996-08-28 03:59:28 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
2019-01-29 22:49:25 +01:00
|
|
|
* pathnodes.h
|
|
|
|
* Definitions for planner's internal data structures, especially Paths.
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
* We don't support copying RelOptInfo, IndexOptInfo, or Path nodes.
|
|
|
|
* There are some subsidiary structs that are useful to copy, though.
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2023-01-02 21:00:37 +01:00
|
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2019-01-29 22:49:25 +01:00
|
|
|
* src/include/nodes/pathnodes.h
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
2019-01-29 22:49:25 +01:00
|
|
|
#ifndef PATHNODES_H
|
|
|
|
#define PATHNODES_H
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
#include "access/sdir.h"
|
2014-11-07 23:26:02 +01:00
|
|
|
#include "lib/stringinfo.h"
|
2007-02-19 08:03:34 +01:00
|
|
|
#include "nodes/params.h"
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "nodes/parsenodes.h"
|
2004-11-26 22:08:35 +01:00
|
|
|
#include "storage/block.h"
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2003-02-08 21:20:55 +01:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
/*
|
1999-02-18 01:49:48 +01:00
|
|
|
* Relids
|
2003-02-08 21:20:55 +01:00
|
|
|
* Set of relation identifiers (indexes into the rangetable).
|
1996-08-28 03:59:28 +02:00
|
|
|
*/
|
2003-02-08 21:20:55 +01:00
|
|
|
typedef Bitmapset *Relids;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
/*
|
|
|
|
* When looking for a "cheapest path", this enum specifies whether we want
|
|
|
|
* cheapest startup cost or cheapest total cost.
|
|
|
|
*/
|
|
|
|
typedef enum CostSelector
|
|
|
|
{
|
|
|
|
STARTUP_COST, TOTAL_COST
|
|
|
|
} CostSelector;
|
|
|
|
|
2003-01-12 23:35:29 +01:00
|
|
|
/*
|
|
|
|
* The cost estimate produced by cost_qual_eval() includes both a one-time
|
|
|
|
* (startup) cost, and a per-tuple cost.
|
|
|
|
*/
|
|
|
|
typedef struct QualCost
|
|
|
|
{
|
|
|
|
Cost startup; /* one-time cost */
|
|
|
|
Cost per_tuple; /* per-evaluation cost */
|
|
|
|
} QualCost;
|
|
|
|
|
2011-04-24 22:55:20 +02:00
|
|
|
/*
|
|
|
|
* Costing aggregate function execution requires these statistics about
|
Support ordered-set (WITHIN GROUP) aggregates.
This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()). We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.
Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions. To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c. This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need. There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.
In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates. Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.
Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
|
|
|
* the aggregates to be executed by a given Agg node. Note that the costs
|
|
|
|
* include the execution costs of the aggregates' argument expressions as
|
2016-06-26 21:55:01 +02:00
|
|
|
* well as the aggregate functions themselves. Also, the fields must be
|
|
|
|
* defined so that initializing the struct to zeroes with memset is correct.
|
2011-04-24 22:55:20 +02:00
|
|
|
*/
|
|
|
|
typedef struct AggClauseCosts
|
|
|
|
{
|
|
|
|
QualCost transCost; /* total per-input-row execution costs */
|
2019-02-10 00:32:23 +01:00
|
|
|
QualCost finalCost; /* total per-aggregated-row costs */
|
2011-04-24 22:55:20 +02:00
|
|
|
Size transitionSpace; /* space for pass-by-ref transition data */
|
|
|
|
} AggClauseCosts;
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
/*
|
|
|
|
* This enum identifies the different types of "upper" (post-scan/join)
|
|
|
|
* relations that we might deal with during planning.
|
|
|
|
*/
|
|
|
|
typedef enum UpperRelationKind
|
|
|
|
{
|
|
|
|
UPPERREL_SETOP, /* result of UNION/INTERSECT/EXCEPT, if any */
|
Add a new upper planner relation for partially-aggregated results.
Up until now, we've abused grouped_rel->partial_pathlist as a place to
store partial paths that have been partially aggregate, but that's
really not correct, because a partial path for a relation is supposed
to be one which produces the correct results with the addition of only
a Gather or Gather Merge node, and these paths also require a Finalize
Aggregate step. Instead, add a new partially_group_rel which can hold
either partial paths (which need to be gathered and then have
aggregation finalized) or non-partial paths (which only need to have
aggregation finalized). This allows us to reuse generate_gather_paths
for partially_grouped_rel instead of writing new code, so that this
patch actually basically no net new code while making things cleaner,
simplifying things for pending patches for partition-wise aggregate.
Robert Haas and Jeevan Chalke. The larger patch series of which this
patch is a part was also reviewed and tested by Antonin Houska,
Rajkumar Raghuwanshi, David Rowley, Dilip Kumar, Konstantin Knizhnik,
Pascal Legrand, Rafia Sabih, and me.
Discussion: http://postgr.es/m/CA+TgmobrzFYS3+U8a_BCy3-hOvh5UyJbC18rEcYehxhpw5=ETA@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoZyQEjdBNuoG9-wC5GQ5GrO4544Myo13dVptvx+uLg9uQ@mail.gmail.com
2018-02-26 15:30:12 +01:00
|
|
|
UPPERREL_PARTIAL_GROUP_AGG, /* result of partial grouping/aggregation, if
|
|
|
|
* any */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
UPPERREL_GROUP_AGG, /* result of grouping/aggregation, if any */
|
|
|
|
UPPERREL_WINDOW, /* result of window functions, if any */
|
2021-08-22 13:31:16 +02:00
|
|
|
UPPERREL_PARTIAL_DISTINCT, /* result of partial "SELECT DISTINCT", if any */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
UPPERREL_DISTINCT, /* result of "SELECT DISTINCT", if any */
|
|
|
|
UPPERREL_ORDERED, /* result of ORDER BY, if any */
|
|
|
|
UPPERREL_FINAL /* result of any remaining top-level actions */
|
|
|
|
/* NB: UPPERREL_FINAL must be last enum entry; it's used to size arrays */
|
|
|
|
} UpperRelationKind;
|
|
|
|
|
2007-02-19 08:03:34 +01:00
|
|
|
/*----------
|
|
|
|
* PlannerGlobal
|
|
|
|
* Global information for planning/optimization
|
|
|
|
*
|
|
|
|
* PlannerGlobal holds state for an entire planner invocation; this state
|
|
|
|
* is shared across all levels of sub-Queries that exist in the command being
|
|
|
|
* planned.
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
*
|
|
|
|
* Not all fields are printed. (In some cases, there is no print support for
|
2022-07-20 19:54:25 +02:00
|
|
|
* the field type; in others, doing so would lead to infinite recursion.)
|
2007-02-19 08:03:34 +01:00
|
|
|
*----------
|
|
|
|
*/
|
|
|
|
typedef struct PlannerGlobal
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2007-02-19 08:03:34 +01:00
|
|
|
NodeTag type;
|
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* Param values provided to planner() */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
ParamListInfo boundParams pg_node_attr(read_write_ignore);
|
2007-02-19 08:03:34 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* Plans for SubPlan nodes */
|
|
|
|
List *subplans;
|
2007-02-22 23:00:26 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* PlannerInfos for SubPlan nodes */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
List *subroots pg_node_attr(read_write_ignore);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* indices of subplans that require REWIND */
|
|
|
|
Bitmapset *rewindPlanIDs;
|
2007-02-27 02:11:26 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* "flat" rangetable for executor */
|
|
|
|
List *finalrtable;
|
2007-09-20 19:56:33 +02:00
|
|
|
|
2022-12-06 16:09:24 +01:00
|
|
|
/* "flat" list of RTEPermissionInfos */
|
|
|
|
List *finalrteperminfos;
|
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* "flat" list of PlanRowMarks */
|
|
|
|
List *finalrowmarks;
|
2009-10-12 20:10:51 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* "flat" list of integer RT indexes */
|
|
|
|
List *resultRelations;
|
2017-03-21 14:48:04 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* "flat" list of AppendRelInfos */
|
|
|
|
List *appendRelations;
|
Further adjust EXPLAIN's choices of table alias names.
This patch causes EXPLAIN to always assign a separate table alias to the
parent RTE of an append relation (inheritance set); before, such RTEs
were ignored if not actually scanned by the plan. Since the child RTEs
now always have that same alias to start with (cf. commit 55a1954da),
the net effect is that the parent RTE usually gets the alias used or
implied by the query text, and the children all get that alias with "_N"
appended. (The exception to "usually" is if there are duplicate aliases
in different subtrees of the original query; then some of those original
RTEs will also have "_N" appended.)
This results in more uniform output for partitioned-table plans than
we had before: the partitioned table itself gets the original alias,
and all child tables have aliases with "_N", rather than the previous
behavior where one of the children would get an alias without "_N".
The reason for giving the parent RTE an alias, even if it isn't scanned
by the plan, is that we now use the parent's alias to qualify Vars that
refer to an appendrel output column and appear above the Append or
MergeAppend that computes the appendrel. But below the append, Vars
refer to some one of the child relations, and are displayed that way.
This seems clearer than the old behavior where a Var that could carry
values from any child relation was displayed as if it referred to only
one of them.
While at it, change ruleutils.c so that the code paths used by EXPLAIN
deal in Plan trees not PlanState trees. This effectively reverts a
decision made in commit 1cc29fe7c, which seemed like a good idea at
the time to make ruleutils.c consistent with explain.c. However,
it's problematic because we'd really like to allow executor startup
pruning to remove all the children of an append node when possible,
leaving no child PlanState to resolve Vars against. (That's not done
here, but will be in the next patch.) This requires different handling
of subplans and initplans than before, but is otherwise a pretty
straightforward change.
Discussion: https://postgr.es/m/001001d4f44b$2a2cca50$7e865ef0$@lab.ntt.co.jp
2019-12-11 23:05:18 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* OIDs of relations the plan depends on */
|
|
|
|
List *relationOids;
|
2007-10-11 20:05:27 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* other dependencies, as PlanInvalItems */
|
|
|
|
List *invalItems;
|
2008-09-09 20:58:09 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* type OIDs for PARAM_EXEC Params */
|
|
|
|
List *paramExecTypes;
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* highest PlaceHolderVar ID assigned */
|
|
|
|
Index lastPHId;
|
2008-10-21 22:42:53 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* highest PlanRowMark ID assigned */
|
|
|
|
Index lastRowMarkId;
|
2011-02-10 05:27:07 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* highest plan node ID assigned */
|
|
|
|
int lastPlanNodeId;
|
2015-09-29 03:55:57 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* redo plan when TransactionXmin changes? */
|
|
|
|
bool transientPlan;
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* is plan specific to current role? */
|
|
|
|
bool dependsOnRole;
|
2015-09-16 21:38:47 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* parallel mode potentially OK? */
|
|
|
|
bool parallelModeOK;
|
2015-09-16 21:38:47 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* parallel mode actually required? */
|
|
|
|
bool parallelModeNeeded;
|
2016-08-19 20:03:07 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* worst PROPARALLEL hazard level */
|
|
|
|
char maxParallelHazard;
|
Allow ATTACH PARTITION with only ShareUpdateExclusiveLock.
We still require AccessExclusiveLock on the partition itself, because
otherwise an insert that violates the newly-imposed partition
constraint could be in progress at the same time that we're changing
that constraint; only the lock level on the parent relation is
weakened.
To make this safe, we have to cope with (at least) three separate
problems. First, relevant DDL might commit while we're in the process
of building a PartitionDesc. If so, find_inheritance_children() might
see a new partition while the RELOID system cache still has the old
partition bound cached, and even before invalidation messages have
been queued. To fix that, if we see that the pg_class tuple seems to
be missing or to have a null relpartbound, refetch the value directly
from the table. We can't get the wrong value, because DETACH PARTITION
still requires AccessExclusiveLock throughout; if we ever want to
change that, this will need more thought. In testing, I found it quite
difficult to hit even the null-relpartbound case; the race condition
is extremely tight, but the theoretical risk is there.
Second, successive calls to RelationGetPartitionDesc might not return
the same answer. The query planner will get confused if lookup up the
PartitionDesc for a particular relation does not return a consistent
answer for the entire duration of query planning. Likewise, query
execution will get confused if the same relation seems to have a
different PartitionDesc at different times. Invent a new
PartitionDirectory concept and use it to ensure consistency. This
ensures that a single invocation of either the planner or the executor
sees the same view of the PartitionDesc from beginning to end, but it
does not guarantee that the planner and the executor see the same
view. Since this allows pointers to old PartitionDesc entries to
survive even after a relcache rebuild, also postpone removing the old
PartitionDesc entry until we're certain no one is using it.
For the most part, it seems to be OK for the planner and executor to
have different views of the PartitionDesc, because the executor will
just ignore any concurrently added partitions which were unknown at
plan time; those partitions won't be part of the inheritance
expansion, but invalidation messages will trigger replanning at some
point. Normally, this happens by the time the very next command is
executed, but if the next command acquires no locks and executes a
prepared query, it can manage not to notice until a new transaction is
started. We might want to tighten that up, but it's material for a
separate patch. There would still be a small window where a query
that started just after an ATTACH PARTITION command committed might
fail to notice its results -- but only if the command starts before
the commit has been acknowledged to the user. All in all, the warts
here around serializability seem small enough to be worth accepting
for the considerable advantage of being able to add partitions without
a full table lock.
Although in general the consequences of new partitions showing up
between planning and execution are limited to the query not noticing
the new partitions, run-time partition pruning will get confused in
that case, so that's the third problem that this patch fixes.
Run-time partition pruning assumes that indexes into the PartitionDesc
are stable between planning and execution. So, add code so that if
new partitions are added between plan time and execution time, the
indexes stored in the subplan_map[] and subpart_map[] arrays within
the plan's PartitionedRelPruneInfo get adjusted accordingly. There
does not seem to be a simple way to generalize this scheme to cope
with partitions that are removed, mostly because they could then get
added back again with different bounds, but it works OK for added
partitions.
This code does not try to ensure that every backend participating in
a parallel query sees the same view of the PartitionDesc. That
currently doesn't matter, because we never pass PartitionDesc
indexes between backends. Each backend will ignore the concurrently
added partitions which it notices, and it doesn't matter if different
backends are ignoring different sets of concurrently added partitions.
If in the future that matters, for example because we allow writes in
parallel query and want all participants to do tuple routing to the same
set of partitions, the PartitionDirectory concept could be improved to
share PartitionDescs across backends. There is a draft patch to
serialize and restore PartitionDescs on the thread where this patch
was discussed, which may be a useful place to start.
Patch by me. Thanks to Alvaro Herrera, David Rowley, Simon Riggs,
Amit Langote, and Michael Paquier for discussion, and to Alvaro
Herrera for some review.
Discussion: http://postgr.es/m/CA+Tgmobt2upbSocvvDej3yzokd7AkiT+PvgFH+a9-5VV1oJNSQ@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoZE0r9-cyA-aY6f8WFEROaDLLL7Vf81kZ8MtFCkxpeQSw@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoY13KQZF-=HNTrt9UYWYx3_oYOQpu9ioNT49jGgiDpUEA@mail.gmail.com
2019-03-07 17:13:12 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* partition descriptors */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
PartitionDirectory partition_directory pg_node_attr(read_write_ignore);
|
2007-02-19 08:03:34 +01:00
|
|
|
} PlannerGlobal;
|
|
|
|
|
2007-02-22 23:00:26 +01:00
|
|
|
/* macro for fetching the Plan associated with a SubPlan node */
|
|
|
|
#define planner_subplan_get_plan(root, subplan) \
|
|
|
|
((Plan *) list_nth((root)->glob->subplans, (subplan)->plan_id - 1))
|
|
|
|
|
2007-02-19 08:03:34 +01:00
|
|
|
|
2005-06-06 00:32:58 +02:00
|
|
|
/*----------
|
|
|
|
* PlannerInfo
|
|
|
|
* Per-query information for planning/optimization
|
|
|
|
*
|
|
|
|
* This struct is conventionally called "root" in all the planner routines.
|
|
|
|
* It holds links to all of the planner's working state, in addition to the
|
2006-01-31 22:39:25 +01:00
|
|
|
* original Query. Note that at present the planner extensively modifies
|
2005-06-06 00:32:58 +02:00
|
|
|
* the passed-in Query data structure; someday that should stop.
|
2019-01-29 21:48:51 +01:00
|
|
|
*
|
|
|
|
* For reasons explained in optimizer/optimizer.h, we define the typedef
|
|
|
|
* either here or in that header, whichever is read first.
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
*
|
|
|
|
* Not all fields are printed. (In some cases, there is no print support for
|
2022-07-20 19:54:25 +02:00
|
|
|
* the field type; in others, doing so would lead to infinite recursion or
|
|
|
|
* bloat dump output more than seems useful.)
|
2005-06-06 00:32:58 +02:00
|
|
|
*----------
|
|
|
|
*/
|
2019-01-29 21:48:51 +01:00
|
|
|
#ifndef HAVE_PLANNERINFO_TYPEDEF
|
|
|
|
typedef struct PlannerInfo PlannerInfo;
|
|
|
|
#define HAVE_PLANNERINFO_TYPEDEF 1
|
|
|
|
#endif
|
2018-06-26 16:35:26 +02:00
|
|
|
|
2019-01-29 21:48:51 +01:00
|
|
|
struct PlannerInfo
|
2005-06-06 00:32:58 +02:00
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2005-06-06 00:32:58 +02:00
|
|
|
NodeTag type;
|
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* the Query being planned */
|
|
|
|
Query *parse;
|
2005-06-06 00:32:58 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* global info for current planner run */
|
|
|
|
PlannerGlobal *glob;
|
2007-02-19 08:03:34 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* 1 at the outermost Query */
|
|
|
|
Index query_level;
|
2007-02-19 08:03:34 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* NULL at outermost Query */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
PlannerInfo *parent_root pg_node_attr(read_write_ignore);
|
2008-10-04 23:56:55 +02:00
|
|
|
|
2015-08-12 05:48:37 +02:00
|
|
|
/*
|
|
|
|
* plan_params contains the expressions that this query level needs to
|
|
|
|
* make available to a lower query level that is currently being planned.
|
|
|
|
* outer_params contains the paramIds of PARAM_EXEC Params that outer
|
|
|
|
* query levels will make available to this query level.
|
|
|
|
*/
|
2022-07-08 09:22:27 +02:00
|
|
|
/* list of PlannerParamItems, see below */
|
|
|
|
List *plan_params;
|
2015-08-12 05:48:37 +02:00
|
|
|
Bitmapset *outer_params;
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
|
2005-06-06 06:13:36 +02:00
|
|
|
/*
|
2006-01-31 22:39:25 +01:00
|
|
|
* simple_rel_array holds pointers to "base rels" and "other rels" (see
|
2005-06-06 06:13:36 +02:00
|
|
|
* comments for RelOptInfo for more info). It is indexed by rangetable
|
|
|
|
* index (so entry 0 is always wasted). Entries can be NULL when an RTE
|
2006-01-31 22:39:25 +01:00
|
|
|
* does not correspond to a base relation, such as a join RTE or an
|
|
|
|
* unreferenced view RTE; or if the RelOptInfo hasn't been made yet.
|
2005-06-06 06:13:36 +02:00
|
|
|
*/
|
2022-07-20 19:54:25 +02:00
|
|
|
struct RelOptInfo **simple_rel_array pg_node_attr(array_size(simple_rel_array_size));
|
2022-07-08 09:22:27 +02:00
|
|
|
/* allocated size of array */
|
2022-07-20 19:54:25 +02:00
|
|
|
int simple_rel_array_size;
|
2005-06-06 06:13:36 +02:00
|
|
|
|
2007-04-21 23:01:45 +02:00
|
|
|
/*
|
|
|
|
* simple_rte_array is the same length as simple_rel_array and holds
|
2019-08-09 18:33:43 +02:00
|
|
|
* pointers to the associated rangetable entries. Using this is a shade
|
2022-07-20 19:54:25 +02:00
|
|
|
* faster than using rt_fetch(), mostly due to fewer indirections. (Not
|
|
|
|
* printed because it'd be redundant with parse->rtable.)
|
2007-04-21 23:01:45 +02:00
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
RangeTblEntry **simple_rte_array pg_node_attr(read_write_ignore);
|
2007-04-21 23:01:45 +02:00
|
|
|
|
2018-06-26 16:35:26 +02:00
|
|
|
/*
|
2018-06-27 21:40:24 +02:00
|
|
|
* append_rel_array is the same length as the above arrays, and holds
|
2018-06-26 16:35:26 +02:00
|
|
|
* pointers to the corresponding AppendRelInfo entry indexed by
|
2019-08-09 18:33:43 +02:00
|
|
|
* child_relid, or NULL if the rel is not an appendrel child. The array
|
2022-07-20 19:54:25 +02:00
|
|
|
* itself is not allocated if append_rel_list is empty. (Not printed
|
|
|
|
* because it'd be redundant with append_rel_list.)
|
2018-06-26 16:35:26 +02:00
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
struct AppendRelInfo **append_rel_array pg_node_attr(read_write_ignore);
|
2018-06-26 16:35:26 +02:00
|
|
|
|
2012-01-28 01:26:38 +01:00
|
|
|
/*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* all_baserels is a Relids set of all base relids (but not joins or
|
|
|
|
* "other" rels) in the query. This is computed in deconstruct_jointree.
|
2012-01-28 01:26:38 +01:00
|
|
|
*/
|
|
|
|
Relids all_baserels;
|
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/*
|
|
|
|
* outer_join_rels is a Relids set of all outer-join relids in the query.
|
|
|
|
* This is computed in deconstruct_jointree.
|
|
|
|
*/
|
|
|
|
Relids outer_join_rels;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* all_query_rels is a Relids set of all base relids and outer join relids
|
|
|
|
* (but not "other" relids) in the query. This is the Relids identifier
|
|
|
|
* of the final join we need to form. This is computed in
|
|
|
|
* deconstruct_jointree.
|
|
|
|
*/
|
|
|
|
Relids all_query_rels;
|
|
|
|
|
2005-06-09 01:02:05 +02:00
|
|
|
/*
|
|
|
|
* join_rel_list is a list of all join-relation RelOptInfos we have
|
|
|
|
* considered in this planning run. For small problems we just scan the
|
|
|
|
* list to do lookups, but when there are many join relations we build a
|
|
|
|
* hash table for faster lookups. The hash table is present and valid
|
|
|
|
* when join_rel_hash is not NULL. Note that we still maintain the list
|
|
|
|
* even when using the hash table for lookups; this simplifies life for
|
|
|
|
* GEQO.
|
|
|
|
*/
|
2022-07-02 12:33:07 +02:00
|
|
|
List *join_rel_list;
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
struct HTAB *join_rel_hash pg_node_attr(read_write_ignore);
|
2005-06-06 00:32:58 +02:00
|
|
|
|
2009-11-28 01:46:19 +01:00
|
|
|
/*
|
|
|
|
* When doing a dynamic-programming-style join search, join_rel_level[k]
|
|
|
|
* is a list of all join-relation RelOptInfos of level k, and
|
|
|
|
* join_cur_level is the current level. New join-relation RelOptInfos are
|
|
|
|
* automatically added to the join_rel_level[join_cur_level] list.
|
|
|
|
* join_rel_level is NULL if not in use.
|
2022-07-20 19:54:25 +02:00
|
|
|
*
|
|
|
|
* Note: we've already printed all baserel and joinrel RelOptInfos above,
|
|
|
|
* so we don't dump join_rel_level or other lists of RelOptInfos.
|
2009-11-28 01:46:19 +01:00
|
|
|
*/
|
2022-07-08 09:22:27 +02:00
|
|
|
/* lists of join-relation RelOptInfos */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
List **join_rel_level pg_node_attr(read_write_ignore);
|
2022-07-08 09:22:27 +02:00
|
|
|
/* index of list being extended */
|
|
|
|
int join_cur_level;
|
2009-11-28 01:46:19 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* init SubPlans for query */
|
|
|
|
List *init_plans;
|
2008-10-04 23:56:55 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/*
|
|
|
|
* per-CTE-item list of subplan IDs (or -1 if no subplan was made for that
|
|
|
|
* CTE)
|
|
|
|
*/
|
|
|
|
List *cte_plan_ids;
|
2007-02-19 08:03:34 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* List of Lists of Params for MULTIEXPR subquery outputs */
|
|
|
|
List *multiexpr_params;
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
|
2023-01-30 19:50:25 +01:00
|
|
|
/* list of JoinDomains used in the query (higher ones first) */
|
|
|
|
List *join_domains;
|
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* list of active EquivalenceClasses */
|
|
|
|
List *eq_classes;
|
2005-06-06 00:32:58 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* set true once ECs are canonical */
|
|
|
|
bool ec_merging_done;
|
Speed up finding EquivalenceClasses for a given set of rels
Previously in order to determine which ECs a relation had members in, we
had to loop over all ECs stored in PlannerInfo's eq_classes and check if
ec_relids mentioned the relation. For the most part, this was fine, as
generally, unless queries were fairly complex, the overhead of performing
the lookup would have not been that significant. However, when queries
contained large numbers of joins and ECs, the overhead to find the set of
classes matching a given set of relations could become a significant
portion of the overall planning effort.
Here we allow a much more efficient method to access the ECs which match a
given relation or set of relations. A new Bitmapset field in RelOptInfo
now exists to store the indexes into PlannerInfo's eq_classes list which
each relation is mentioned in. This allows very fast lookups to find all
ECs belonging to a single relation. When we need to lookup ECs belonging
to a given pair of relations, we can simply bitwise-AND the Bitmapsets from
each relation and use the result to perform the lookup.
We also take the opportunity to write a new implementation of
generate_join_implied_equalities which makes use of the new indexes.
generate_join_implied_equalities_for_ecs must remain as is as it can be
given a custom list of ECs, which we can't easily determine the indexes of.
This was originally intended to fix the performance penalty of looking up
foreign keys matching a join condition which was introduced by 100340e2d.
However, we're speeding up much more than just that here.
Author: David Rowley, Tom Lane
Reviewed-by: Tom Lane, Tomas Vondra
Discussion: https://postgr.es/m/6970.1545327857@sss.pgh.pa.us
2019-07-21 07:30:58 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* list of "canonical" PathKeys */
|
|
|
|
List *canon_pathkeys;
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* list of OuterJoinClauseInfos for mergejoinable outer join clauses
|
2022-07-08 09:22:27 +02:00
|
|
|
* w/nonnullable var on left
|
|
|
|
*/
|
|
|
|
List *left_join_clauses;
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* list of OuterJoinClauseInfos for mergejoinable outer join clauses
|
2022-07-08 09:22:27 +02:00
|
|
|
* w/nonnullable var on right
|
|
|
|
*/
|
|
|
|
List *right_join_clauses;
|
2007-01-20 21:45:41 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* list of OuterJoinClauseInfos for mergejoinable full join clauses
|
2022-07-08 09:22:27 +02:00
|
|
|
*/
|
|
|
|
List *full_join_clauses;
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* list of SpecialJoinInfos */
|
|
|
|
List *join_info_list;
|
2012-08-27 04:48:55 +02:00
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* counter for assigning RestrictInfo serial numbers */
|
|
|
|
int last_rinfo_serial;
|
|
|
|
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
/*
|
|
|
|
* all_result_relids is empty for SELECT, otherwise it contains at least
|
2022-10-24 12:52:43 +02:00
|
|
|
* parse->resultRelation. For UPDATE/DELETE/MERGE across an inheritance
|
|
|
|
* or partitioning tree, the result rel's child relids are added. When
|
|
|
|
* using multi-level partitioning, intermediate partitioned rels are
|
|
|
|
* included. leaf_result_relids is similar except that only actual result
|
|
|
|
* tables, not partitioned tables, are included in it.
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
*/
|
2022-07-08 09:22:27 +02:00
|
|
|
/* set of all result relids */
|
|
|
|
Relids all_result_relids;
|
|
|
|
/* set of all leaf relids */
|
|
|
|
Relids leaf_result_relids;
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
|
Use Append rather than MergeAppend for scanning ordered partitions.
If we need ordered output from a scan of a partitioned table, but
the ordering matches the partition ordering, then we don't need to
use a MergeAppend to combine the pre-ordered per-partition scan
results: a plain Append will produce the same results. This
both saves useless comparison work inside the MergeAppend proper,
and allows us to start returning tuples after istarting up just
the first child node not all of them.
However, all is not peaches and cream, because if some of the
child nodes have high startup costs then there will be big
discontinuities in the tuples-returned-versus-elapsed-time curve.
The planner's cost model cannot handle that (yet, anyway).
If we model the Append's startup cost as being just the first
child's startup cost, we may drastically underestimate the cost
of fetching slightly more tuples than are available from the first
child. Since we've had bad experiences with over-optimistic choices
of "fast start" plans for ORDER BY LIMIT queries, that seems scary.
As a klugy workaround, set the startup cost estimate for an ordered
Append to be the sum of its children's startup costs (as MergeAppend
would). This doesn't really describe reality, but it's less likely
to cause a bad plan choice than an underestimated startup cost would.
In practice, the cases where we really care about this optimization
will have child plans that are IndexScans with zero startup cost,
so that the overly conservative estimate is still just zero.
David Rowley, reviewed by Julien Rouhaud and Antonin Houska
Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
2019-04-06 01:20:30 +02:00
|
|
|
/*
|
2022-07-08 09:22:27 +02:00
|
|
|
* list of AppendRelInfos
|
|
|
|
*
|
Use Append rather than MergeAppend for scanning ordered partitions.
If we need ordered output from a scan of a partitioned table, but
the ordering matches the partition ordering, then we don't need to
use a MergeAppend to combine the pre-ordered per-partition scan
results: a plain Append will produce the same results. This
both saves useless comparison work inside the MergeAppend proper,
and allows us to start returning tuples after istarting up just
the first child node not all of them.
However, all is not peaches and cream, because if some of the
child nodes have high startup costs then there will be big
discontinuities in the tuples-returned-versus-elapsed-time curve.
The planner's cost model cannot handle that (yet, anyway).
If we model the Append's startup cost as being just the first
child's startup cost, we may drastically underestimate the cost
of fetching slightly more tuples than are available from the first
child. Since we've had bad experiences with over-optimistic choices
of "fast start" plans for ORDER BY LIMIT queries, that seems scary.
As a klugy workaround, set the startup cost estimate for an ordered
Append to be the sum of its children's startup costs (as MergeAppend
would). This doesn't really describe reality, but it's less likely
to cause a bad plan choice than an underestimated startup cost would.
In practice, the cases where we really care about this optimization
will have child plans that are IndexScans with zero startup cost,
so that the overly conservative estimate is still just zero.
David Rowley, reviewed by Julien Rouhaud and Antonin Houska
Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
2019-04-06 01:20:30 +02:00
|
|
|
* Note: for AppendRelInfos describing partitions of a partitioned table,
|
|
|
|
* we guarantee that partitions that come earlier in the partitioned
|
|
|
|
* table's PartitionDesc will appear earlier in append_rel_list.
|
|
|
|
*/
|
2022-07-08 09:22:27 +02:00
|
|
|
List *append_rel_list;
|
2006-01-31 22:39:25 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* list of RowIdentityVarInfos */
|
|
|
|
List *row_identity_vars;
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* list of PlanRowMarks */
|
|
|
|
List *rowMarks;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* list of PlaceHolderInfos */
|
|
|
|
List *placeholder_list;
|
2008-10-21 22:42:53 +02:00
|
|
|
|
2022-08-17 21:35:51 +02:00
|
|
|
/* array of PlaceHolderInfos indexed by phid */
|
|
|
|
struct PlaceHolderInfo **placeholder_array pg_node_attr(read_write_ignore, array_size(placeholder_array_size));
|
|
|
|
/* allocated size of array */
|
|
|
|
int placeholder_array_size pg_node_attr(read_write_ignore);
|
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* list of ForeignKeyOptInfos */
|
|
|
|
List *fkey_list;
|
2016-06-18 21:22:34 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* desired pathkeys for query_planner() */
|
|
|
|
List *query_pathkeys;
|
2005-06-06 00:32:58 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* groupClause pathkeys, if any */
|
|
|
|
List *group_pathkeys;
|
Improve performance of ORDER BY / DISTINCT aggregates
ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been
executed by always performing a sort in nodeAgg.c to sort the tuples in
the current group into the correct order before calling the transition
function on the sorted tuples. This was not great as often there might be
an index that could have provided pre-sorted input and allowed the
transition functions to be called as the rows come in, rather than having
to store them in a tuplestore in order to sort them once all the tuples
for the group have arrived.
Here we change the planner so it requests a path with a sort order which
supports the most amount of ORDER BY / DISTINCT aggregate functions and
add new code to the executor to allow it to support the processing of
ORDER BY / DISTINCT aggregates where the tuples are already sorted in the
correct order.
Since there can be many ORDER BY / DISTINCT aggregates in any given query
level, it's very possible that we can't find an order that suits all of
these aggregates. The sort order that the planner chooses is simply the
one that suits the most aggregate functions. We take the most strictly
sorted variation of each order and see how many aggregate functions can
use that, then we try again with the order of the remaining aggregates to
see if another order would suit more aggregate functions. For example:
SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ...
would request the sort order to be {a, b} because {a} is a subset of the
sort order of {a,b}, but;
SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ...
would just pick a plan ordered by {a} (we give precedence to aggregates
which are earlier in the targetlist).
SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ...
would choose to order by {b} since two aggregates suit that vs just one
that requires input ordered by {a}.
Author: David Rowley
Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane
Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The number of elements in the group_pathkeys list which belong to the
|
|
|
|
* GROUP BY clause. Additional ones belong to ORDER BY / DISTINCT
|
|
|
|
* aggregates.
|
|
|
|
*/
|
|
|
|
int num_groupby_pathkeys;
|
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* pathkeys of bottom window, if any */
|
|
|
|
List *window_pathkeys;
|
|
|
|
/* distinctClause pathkeys, if any */
|
|
|
|
List *distinct_pathkeys;
|
|
|
|
/* sortClause pathkeys, if any */
|
|
|
|
List *sort_pathkeys;
|
2005-08-28 00:13:44 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* Canonicalised partition schemes used in the query. */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
List *part_schemes pg_node_attr(read_write_ignore);
|
2017-09-21 05:33:04 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* RelOptInfos we are now trying to join */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
List *initial_rels pg_node_attr(read_write_ignore);
|
2008-01-11 05:02:18 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/*
|
|
|
|
* Upper-rel RelOptInfos. Use fetch_upper_rel() to get any particular
|
|
|
|
* upper rel.
|
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
List *upper_rels[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
|
2016-03-15 00:23:29 +01:00
|
|
|
/* Result tlists chosen by grouping_planner for upper-stage processing */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
struct PathTarget *upper_targets[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore);
|
2016-03-15 00:23:29 +01:00
|
|
|
|
Remove redundant grouping and DISTINCT columns.
Avoid explicitly grouping by columns that we know are redundant
for sorting, for example we need group by only one of x and y in
SELECT ... WHERE x = y GROUP BY x, y
This comes up more often than you might think, as shown by the
changes in the regression tests. It's nearly free to detect too,
since we are just piggybacking on the existing logic that detects
redundant pathkeys. (In some of the existing plans that change,
it's visible that a sort step preceding the grouping step already
didn't bother to sort by the redundant column, making the old plan
a bit silly-looking.)
To do this, build processed_groupClause and processed_distinctClause
lists that omit any provably-redundant sort items, and consult those
not the originals where relevant. This means that within the
planner, one should usually consult root->processed_groupClause or
root->processed_distinctClause if one wants to know which columns
are to be grouped on; but to check whether grouping or distinct-ing
is happening at all, check non-NIL-ness of parse->groupClause or
parse->distinctClause. This is comparable to longstanding rules
about handling the HAVING clause, so I don't think it'll be a huge
maintenance problem.
nodeAgg.c also needs minor mods, because it's now possible to generate
AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns.
Patch by me; thanks to Richard Guo and David Rowley for review.
Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2023-01-18 18:37:57 +01:00
|
|
|
/*
|
|
|
|
* The fully-processed groupClause is kept here. It differs from
|
|
|
|
* parse->groupClause in that we remove any items that we can prove
|
|
|
|
* redundant, so that only the columns named here actually need to be
|
|
|
|
* compared to determine grouping. Note that it's possible for *all* the
|
|
|
|
* items to be proven redundant, implying that there is only one group
|
|
|
|
* containing all the query's rows. Hence, if you want to check whether
|
|
|
|
* GROUP BY was specified, test for nonempty parse->groupClause, not for
|
|
|
|
* nonempty processed_groupClause.
|
|
|
|
*
|
|
|
|
* Currently, when grouping sets are specified we do not attempt to
|
|
|
|
* optimize the groupClause, so that processed_groupClause will be
|
|
|
|
* identical to parse->groupClause.
|
|
|
|
*/
|
|
|
|
List *processed_groupClause;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The fully-processed distinctClause is kept here. It differs from
|
|
|
|
* parse->distinctClause in that we remove any items that we can prove
|
|
|
|
* redundant, so that only the columns named here actually need to be
|
|
|
|
* compared to determine uniqueness. Note that it's possible for *all*
|
|
|
|
* the items to be proven redundant, implying that there should be only
|
|
|
|
* one output row. Hence, if you want to check whether DISTINCT was
|
|
|
|
* specified, test for nonempty parse->distinctClause, not for nonempty
|
|
|
|
* processed_distinctClause.
|
|
|
|
*/
|
|
|
|
List *processed_distinctClause;
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
/*
|
Avoid passing query tlist around separately from root->processed_tlist.
In the dim past, the planner kept the fully-processed version of the query
targetlist (the result of preprocess_targetlist) in grouping_planner's
local variable "tlist", and only grudgingly passed it to individual other
routines as needed. Later we discovered a need to still have it available
after grouping_planner finishes, and invented the root->processed_tlist
field for that purpose, but it wasn't used internally to grouping_planner;
the tlist was still being passed around separately in the same places as
before.
Now comes a proposed patch to allow appendrel expansion to add entries
to the processed tlist, well after preprocess_targetlist has finished
its work. To avoid having to pass around the tlist explicitly, it's
proposed to allow appendrel expansion to modify root->processed_tlist.
That makes aliasing the tlist with assorted parameters and local
variables really scary. It would accidentally work as long as the
tlist is initially nonempty, because then the List header won't move
around, but it's not exactly hard to think of ways for that to break.
Aliased values are poor programming practice anyway.
Hence, get rid of local variables and parameters that can be identified
with root->processed_tlist, in favor of just using that field directly.
And adjust comments to match. (Some of the new comments speak as though
it's already possible for appendrel expansion to modify the tlist; that's
not true yet, but will happen in a later patch.)
Discussion: https://postgr.es/m/9d7c5112-cb99-6a47-d3be-cf1ee6862a1d@lab.ntt.co.jp
2019-03-27 17:57:41 +01:00
|
|
|
* The fully-processed targetlist is kept here. It differs from
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
* parse->targetList in that (for INSERT) it's been reordered to match the
|
|
|
|
* target table, and defaults have been filled in. Also, additional
|
|
|
|
* resjunk targets may be present. preprocess_targetlist() does most of
|
|
|
|
* that work, but note that more resjunk targets can get added during
|
|
|
|
* appendrel expansion. (Hence, upper_targets mustn't get set up till
|
|
|
|
* after that.)
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
*/
|
|
|
|
List *processed_tlist;
|
|
|
|
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
/*
|
|
|
|
* For UPDATE, this list contains the target table's attribute numbers to
|
|
|
|
* which the first N entries of processed_tlist are to be assigned. (Any
|
|
|
|
* additional entries in processed_tlist must be resjunk.) DO NOT use the
|
|
|
|
* resnos in processed_tlist to identify the UPDATE target columns.
|
|
|
|
*/
|
|
|
|
List *update_colnos;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* Fields filled during create_plan() for use in setrefs.c
|
|
|
|
*/
|
2022-07-20 19:54:25 +02:00
|
|
|
/* for GroupingFunc fixup (can't print: array length not known here) */
|
|
|
|
AttrNumber *grouping_map pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* List of MinMaxAggInfos */
|
|
|
|
List *minmax_aggs;
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* context holding PlannerInfo */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
MemoryContext planner_cxt pg_node_attr(read_write_ignore);
|
2007-01-20 21:45:41 +01:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* # of pages in all non-dummy tables of query */
|
|
|
|
Cardinality total_table_pages;
|
2006-09-20 00:49:53 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/* tuple_fraction passed to query_planner */
|
|
|
|
Selectivity tuple_fraction;
|
|
|
|
/* limit_tuples passed to query_planner */
|
|
|
|
Cardinality limit_tuples;
|
2005-06-10 05:32:25 +02:00
|
|
|
|
2022-07-08 09:22:27 +02:00
|
|
|
/*
|
|
|
|
* Minimum security_level for quals. Note: qual_security_level is zero if
|
|
|
|
* there are no securityQuals.
|
|
|
|
*/
|
|
|
|
Index qual_security_level;
|
|
|
|
|
|
|
|
/* true if any RTEs are RTE_JOIN kind */
|
|
|
|
bool hasJoinRTEs;
|
|
|
|
/* true if any RTEs are marked LATERAL */
|
|
|
|
bool hasLateralRTEs;
|
|
|
|
/* true if havingQual was non-null */
|
|
|
|
bool hasHavingQual;
|
|
|
|
/* true if any RestrictInfo has pseudoconstant = true */
|
|
|
|
bool hasPseudoConstantQuals;
|
|
|
|
/* true if we've made any of those */
|
|
|
|
bool hasAlternativeSubPlans;
|
2022-08-17 21:52:53 +02:00
|
|
|
/* true once we're no longer allowed to add PlaceHolderInfos */
|
|
|
|
bool placeholdersFrozen;
|
2022-07-08 09:22:27 +02:00
|
|
|
/* true if planning a recursive WITH item */
|
|
|
|
bool hasRecursion;
|
2008-10-04 23:56:55 +02:00
|
|
|
|
2020-11-24 09:45:00 +01:00
|
|
|
/*
|
|
|
|
* Information about aggregates. Filled by preprocess_aggrefs().
|
|
|
|
*/
|
2022-07-08 09:22:27 +02:00
|
|
|
/* AggInfo structs */
|
2022-07-19 18:29:37 +02:00
|
|
|
List *agginfos;
|
2022-07-08 09:22:27 +02:00
|
|
|
/* AggTransInfo structs */
|
2022-07-19 18:29:37 +02:00
|
|
|
List *aggtransinfos;
|
|
|
|
/* number of aggs with DISTINCT/ORDER BY/WITHIN GROUP */
|
|
|
|
int numOrderedAggs;
|
2022-07-08 09:22:27 +02:00
|
|
|
/* does any agg not support partial mode? */
|
2022-07-19 18:29:37 +02:00
|
|
|
bool hasNonPartialAggs;
|
2022-07-08 09:22:27 +02:00
|
|
|
/* is any partial agg non-serializable? */
|
2022-07-19 18:29:37 +02:00
|
|
|
bool hasNonSerialAggs;
|
2022-07-08 09:22:27 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* These fields are used only when hasRecursion is true:
|
|
|
|
*/
|
|
|
|
/* PARAM_EXEC ID for the work table */
|
|
|
|
int wt_param_id;
|
|
|
|
/* a path for non-recursive term */
|
2022-07-20 19:54:25 +02:00
|
|
|
struct Path *non_recursive_path;
|
2022-07-08 09:22:27 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* These fields are workspace for createplan.c
|
|
|
|
*/
|
|
|
|
/* outer rels above current node */
|
|
|
|
Relids curOuterRels;
|
|
|
|
/* not-yet-assigned NestLoopParams */
|
|
|
|
List *curOuterParams;
|
2010-07-12 19:01:06 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* These fields are workspace for setrefs.c. Each is an array
|
2022-07-20 19:54:25 +02:00
|
|
|
* corresponding to glob->subplans. (We could probably teach
|
|
|
|
* gen_node_support.pl how to determine the array length, but it doesn't
|
|
|
|
* seem worth the trouble, so just mark them read_write_ignore.)
|
2022-07-02 12:33:07 +02:00
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
bool *isAltSubplan pg_node_attr(read_write_ignore);
|
|
|
|
bool *isUsedSubplan pg_node_attr(read_write_ignore);
|
2021-09-14 21:11:21 +02:00
|
|
|
|
2009-07-16 22:55:44 +02:00
|
|
|
/* optional private data for join_search_hook, e.g., GEQO */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
void *join_search_private pg_node_attr(read_write_ignore);
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
|
|
|
|
/* Does this query modify any partition key columns? */
|
|
|
|
bool partColsUpdated;
|
2019-01-29 21:48:51 +01:00
|
|
|
};
|
2005-06-06 00:32:58 +02:00
|
|
|
|
|
|
|
|
2007-04-21 23:01:45 +02:00
|
|
|
/*
|
|
|
|
* In places where it's known that simple_rte_array[] must have been prepared
|
|
|
|
* already, we just index into it to fetch RTEs. In code that might be
|
|
|
|
* executed before or after entering query_planner(), use this macro.
|
|
|
|
*/
|
|
|
|
#define planner_rt_fetch(rti, root) \
|
|
|
|
((root)->simple_rte_array ? (root)->simple_rte_array[rti] : \
|
|
|
|
rt_fetch(rti, (root)->parse->rtable))
|
|
|
|
|
2017-09-21 05:33:04 +02:00
|
|
|
/*
|
|
|
|
* If multiple relations are partitioned the same way, all such partitions
|
|
|
|
* will have a pointer to the same PartitionScheme. A list of PartitionScheme
|
|
|
|
* objects is attached to the PlannerInfo. By design, the partition scheme
|
|
|
|
* incorporates only the general properties of the partition method (LIST vs.
|
|
|
|
* RANGE, number of partitioning columns and the type information for each)
|
|
|
|
* and not the specific bounds.
|
|
|
|
*
|
|
|
|
* We store the opclass-declared input data types instead of the partition key
|
|
|
|
* datatypes since the former rather than the latter are used to compare
|
|
|
|
* partition bounds. Since partition key data types and the opclass declared
|
|
|
|
* input data types are expected to be binary compatible (per ResolveOpClass),
|
|
|
|
* both of those should have same byval and length properties.
|
|
|
|
*/
|
|
|
|
typedef struct PartitionSchemeData
|
|
|
|
{
|
|
|
|
char strategy; /* partition strategy */
|
|
|
|
int16 partnatts; /* number of partition attributes */
|
|
|
|
Oid *partopfamily; /* OIDs of operator families */
|
|
|
|
Oid *partopcintype; /* OIDs of opclass declared input data types */
|
2018-02-28 18:16:09 +01:00
|
|
|
Oid *partcollation; /* OIDs of partitioning collations */
|
2017-09-21 05:33:04 +02:00
|
|
|
|
|
|
|
/* Cached information about partition key data types. */
|
|
|
|
int16 *parttyplen;
|
|
|
|
bool *parttypbyval;
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
|
|
|
|
/* Cached information about partition comparison functions. */
|
2019-08-16 19:35:31 +02:00
|
|
|
struct FmgrInfo *partsupfunc;
|
2017-09-21 05:33:04 +02:00
|
|
|
} PartitionSchemeData;
|
|
|
|
|
|
|
|
typedef struct PartitionSchemeData *PartitionScheme;
|
2007-04-21 23:01:45 +02:00
|
|
|
|
2000-09-29 20:21:41 +02:00
|
|
|
/*----------
|
1998-07-18 06:22:52 +02:00
|
|
|
* RelOptInfo
|
1999-08-16 04:17:58 +02:00
|
|
|
* Per-relation information for planning/optimization
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2002-03-12 01:52:10 +01:00
|
|
|
* For planning purposes, a "base rel" is either a plain relation (a table)
|
2003-01-15 20:35:48 +01:00
|
|
|
* or the output of a sub-SELECT or function that appears in the range table.
|
2002-03-12 01:52:10 +01:00
|
|
|
* In either case it is uniquely identified by an RT index. A "joinrel"
|
|
|
|
* is the joining of two or more base rels. A joinrel is identified by
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* the set of RT indexes for its component baserels, along with RT indexes
|
|
|
|
* for any outer joins it has computed. We create RelOptInfo nodes for each
|
|
|
|
* baserel and joinrel, and store them in the PlannerInfo's simple_rel_array
|
|
|
|
* and join_rel_list respectively.
|
2000-09-29 20:21:41 +02:00
|
|
|
*
|
2002-03-12 01:52:10 +01:00
|
|
|
* Note that there is only one joinrel for any given set of component
|
|
|
|
* baserels, no matter what order we assemble them in; so an unordered
|
|
|
|
* set is the right datatype to identify it with.
|
2000-09-29 20:21:41 +02:00
|
|
|
*
|
2002-03-12 01:52:10 +01:00
|
|
|
* We also have "other rels", which are like base rels in that they refer to
|
2005-06-06 06:13:36 +02:00
|
|
|
* single RT indexes; but they are not part of the join tree, and are given
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
* a different RelOptKind to identify them.
|
2006-01-31 22:39:25 +01:00
|
|
|
* Currently the only kind of otherrels are those made for member relations
|
|
|
|
* of an "append relation", that is an inheritance set or UNION ALL subquery.
|
|
|
|
* An append relation has a parent RTE that is a base rel, which represents
|
|
|
|
* the entire append relation. The member RTEs are otherrels. The parent
|
|
|
|
* is present in the query join tree but the members are not. The member
|
|
|
|
* RTEs and otherrels are used to plan the scans of the individual tables or
|
2010-10-14 22:56:39 +02:00
|
|
|
* subqueries of the append set; then the parent baserel is given Append
|
|
|
|
* and/or MergeAppend paths comprising the best paths for the individual
|
|
|
|
* member rels. (See comments for AppendRelInfo for more information.)
|
2002-03-12 01:52:10 +01:00
|
|
|
*
|
2003-01-15 20:35:48 +01:00
|
|
|
* At one time we also made otherrels to represent join RTEs, for use in
|
|
|
|
* handling join alias Vars. Currently this is not needed because all join
|
|
|
|
* alias Vars are expanded to non-aliased form during preprocess_expression.
|
|
|
|
*
|
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
|
|
|
* We also have relations representing joins between child relations of
|
|
|
|
* different partitioned tables. These relations are not added to
|
|
|
|
* join_rel_level lists as they are not joined directly by the dynamic
|
|
|
|
* programming algorithm.
|
|
|
|
*
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
* There is also a RelOptKind for "upper" relations, which are RelOptInfos
|
|
|
|
* that describe post-scan/join processing steps, such as aggregation.
|
|
|
|
* Many of the fields in these RelOptInfos are meaningless, but their Path
|
|
|
|
* fields always hold Paths showing ways to do that processing step.
|
|
|
|
*
|
2002-03-12 01:52:10 +01:00
|
|
|
* Parts of this data structure are specific to various scan and join
|
|
|
|
* mechanisms. It didn't seem worth creating new node types for them.
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* relids - Set of relation identifiers (RT indexes). This is a base
|
|
|
|
* relation if there is just one, a join relation if more;
|
|
|
|
* in the join case, RT indexes of any outer joins formed
|
|
|
|
* at or below this join are included along with baserels
|
2000-01-09 01:26:47 +01:00
|
|
|
* rows - estimated number of tuples in the relation after restriction
|
|
|
|
* clauses have been applied (ie, output rows of a plan for it)
|
Fix planner's cost estimation for SEMI/ANTI joins with inner indexscans.
When the inner side of a nestloop SEMI or ANTI join is an indexscan that
uses all the join clauses as indexquals, it can be presumed that both
matched and unmatched outer rows will be processed very quickly: for
matched rows, we'll stop after fetching one row from the indexscan, while
for unmatched rows we'll have an indexscan that finds no matching index
entries, which should also be quick. The planner already knew about this,
but it was nonetheless charging for at least one full run of the inner
indexscan, as a consequence of concerns about the behavior of materialized
inner scans --- but those concerns don't apply in the fast case. If the
inner side has low cardinality (many matching rows) this could make an
indexscan plan look far more expensive than it actually is. To fix,
rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we
don't add the inner scan cost until we've inspected the indexquals, and
then we can add either the full-run cost or just the first tuple's cost as
appropriate.
Experimentation with this fix uncovered another problem: add_path and
friends were coded to disregard cheap startup cost when considering
parameterized paths. That's usually okay (and desirable, because it thins
the path herd faster); but in this fast case for SEMI/ANTI joins, it could
result in throwing away the desired plain indexscan path in favor of a
bitmap scan path before we ever get to the join costing logic. In the
many-matching-rows cases of interest here, a bitmap scan will do a lot more
work than required, so this is a problem. To fix, add a per-relation flag
consider_param_startup that works like the existing consider_startup flag,
but applies to parameterized paths, and set it for relations that are the
inside of a SEMI or ANTI join.
To make this patch reasonably safe to back-patch, care has been taken to
avoid changing the planner's behavior except in the very narrow case of
SEMI/ANTI joins with inner indexscans. There are places in
compare_path_costs_fuzzily and add_path_precheck that are not terribly
consistent with the new approach, but changing them will affect planner
decisions at the margins in other cases, so we'll leave that for a
HEAD-only fix.
Back-patch to 9.3; before that, the consider_startup flag didn't exist,
meaning that the second aspect of the patch would be too invasive.
Per a complaint from Peter Holzer and analysis by Tomas Vondra.
2015-06-03 17:58:47 +02:00
|
|
|
* consider_startup - true if there is any value in keeping plain paths for
|
2012-09-02 00:16:24 +02:00
|
|
|
* this rel on the basis of having cheap startup cost
|
Fix planner's cost estimation for SEMI/ANTI joins with inner indexscans.
When the inner side of a nestloop SEMI or ANTI join is an indexscan that
uses all the join clauses as indexquals, it can be presumed that both
matched and unmatched outer rows will be processed very quickly: for
matched rows, we'll stop after fetching one row from the indexscan, while
for unmatched rows we'll have an indexscan that finds no matching index
entries, which should also be quick. The planner already knew about this,
but it was nonetheless charging for at least one full run of the inner
indexscan, as a consequence of concerns about the behavior of materialized
inner scans --- but those concerns don't apply in the fast case. If the
inner side has low cardinality (many matching rows) this could make an
indexscan plan look far more expensive than it actually is. To fix,
rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we
don't add the inner scan cost until we've inspected the indexquals, and
then we can add either the full-run cost or just the first tuple's cost as
appropriate.
Experimentation with this fix uncovered another problem: add_path and
friends were coded to disregard cheap startup cost when considering
parameterized paths. That's usually okay (and desirable, because it thins
the path herd faster); but in this fast case for SEMI/ANTI joins, it could
result in throwing away the desired plain indexscan path in favor of a
bitmap scan path before we ever get to the join costing logic. In the
many-matching-rows cases of interest here, a bitmap scan will do a lot more
work than required, so this is a problem. To fix, add a per-relation flag
consider_param_startup that works like the existing consider_startup flag,
but applies to parameterized paths, and set it for relations that are the
inside of a SEMI or ANTI join.
To make this patch reasonably safe to back-patch, care has been taken to
avoid changing the planner's behavior except in the very narrow case of
SEMI/ANTI joins with inner indexscans. There are places in
compare_path_costs_fuzzily and add_path_precheck that are not terribly
consistent with the new approach, but changing them will affect planner
decisions at the margins in other cases, so we'll leave that for a
HEAD-only fix.
Back-patch to 9.3; before that, the consider_startup flag didn't exist,
meaning that the second aspect of the patch would be too invasive.
Per a complaint from Peter Holzer and analysis by Tomas Vondra.
2015-06-03 17:58:47 +02:00
|
|
|
* consider_param_startup - the same for parameterized paths
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
* reltarget - Default Path output tlist for this rel; normally contains
|
|
|
|
* Var and PlaceHolderVar nodes for the values we need to
|
|
|
|
* output from this relation.
|
|
|
|
* List is in no particular order, but all rels of an
|
|
|
|
* appendrel set must use corresponding orders.
|
|
|
|
* NOTE: in an appendrel child relation, may contain
|
|
|
|
* arbitrary expressions pulled up from a subquery!
|
1999-08-16 04:17:58 +02:00
|
|
|
* pathlist - List of Path nodes, one for each potentially useful
|
|
|
|
* method of generating the relation
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
* ppilist - ParamPathInfo nodes for parameterized Paths, if any
|
2000-02-15 21:49:31 +01:00
|
|
|
* cheapest_startup_path - the pathlist member with lowest startup cost
|
Adjust definition of cheapest_total_path to work better with LATERAL.
In the initial cut at LATERAL, I kept the rule that cheapest_total_path
was always unparameterized, which meant it had to be NULL if the relation
has no unparameterized paths. It turns out to work much more nicely if
we always have *some* path nominated as cheapest-total for each relation.
In particular, let's still say it's the cheapest unparameterized path if
there is one; if not, take the cheapest-total-cost path among those of
the minimum available parameterization. (The first rule is actually
a special case of the second.)
This allows reversion of some temporary lobotomizations I'd put in place.
In particular, the planner can now consider hash and merge joins for
joins below a parameter-supplying nestloop, even if there aren't any
unparameterized paths available. This should bring planning of
LATERAL-containing queries to the same level as queries not using that
feature.
Along the way, simplify management of parameterized paths in add_path()
and friends. In the original coding for parameterized paths in 9.2,
I tried to minimize the logic changes in add_path(), so it just treated
parameterization as yet another dimension of comparison for paths.
We later made it ignore pathkeys (sort ordering) of parameterized paths,
on the grounds that ordering isn't a useful property for the path on the
inside of a nestloop, so we might as well get rid of useless parameterized
paths as quickly as possible. But we didn't take that reasoning as far as
we should have. Startup cost isn't a useful property inside a nestloop
either, so add_path() ought to discount startup cost of parameterized paths
as well. Having done that, the secondary sorting I'd implemented (in
add_parameterized_path) is no longer needed --- any parameterized path that
survives add_path() at all is worth considering at higher levels. So this
should be a bit faster as well as simpler.
2012-08-30 04:05:27 +02:00
|
|
|
* (regardless of ordering) among the unparameterized paths;
|
|
|
|
* or NULL if there is no unparameterized path
|
2000-02-15 21:49:31 +01:00
|
|
|
* cheapest_total_path - the pathlist member with lowest total cost
|
Adjust definition of cheapest_total_path to work better with LATERAL.
In the initial cut at LATERAL, I kept the rule that cheapest_total_path
was always unparameterized, which meant it had to be NULL if the relation
has no unparameterized paths. It turns out to work much more nicely if
we always have *some* path nominated as cheapest-total for each relation.
In particular, let's still say it's the cheapest unparameterized path if
there is one; if not, take the cheapest-total-cost path among those of
the minimum available parameterization. (The first rule is actually
a special case of the second.)
This allows reversion of some temporary lobotomizations I'd put in place.
In particular, the planner can now consider hash and merge joins for
joins below a parameter-supplying nestloop, even if there aren't any
unparameterized paths available. This should bring planning of
LATERAL-containing queries to the same level as queries not using that
feature.
Along the way, simplify management of parameterized paths in add_path()
and friends. In the original coding for parameterized paths in 9.2,
I tried to minimize the logic changes in add_path(), so it just treated
parameterization as yet another dimension of comparison for paths.
We later made it ignore pathkeys (sort ordering) of parameterized paths,
on the grounds that ordering isn't a useful property for the path on the
inside of a nestloop, so we might as well get rid of useless parameterized
paths as quickly as possible. But we didn't take that reasoning as far as
we should have. Startup cost isn't a useful property inside a nestloop
either, so add_path() ought to discount startup cost of parameterized paths
as well. Having done that, the secondary sorting I'd implemented (in
add_parameterized_path) is no longer needed --- any parameterized path that
survives add_path() at all is worth considering at higher levels. So this
should be a bit faster as well as simpler.
2012-08-30 04:05:27 +02:00
|
|
|
* (regardless of ordering) among the unparameterized paths;
|
|
|
|
* or if there is no unparameterized path, the path with lowest
|
|
|
|
* total cost among the paths with minimum parameterization
|
2003-01-20 19:55:07 +01:00
|
|
|
* cheapest_unique_path - for caching cheapest path to produce unique
|
Adjust definition of cheapest_total_path to work better with LATERAL.
In the initial cut at LATERAL, I kept the rule that cheapest_total_path
was always unparameterized, which meant it had to be NULL if the relation
has no unparameterized paths. It turns out to work much more nicely if
we always have *some* path nominated as cheapest-total for each relation.
In particular, let's still say it's the cheapest unparameterized path if
there is one; if not, take the cheapest-total-cost path among those of
the minimum available parameterization. (The first rule is actually
a special case of the second.)
This allows reversion of some temporary lobotomizations I'd put in place.
In particular, the planner can now consider hash and merge joins for
joins below a parameter-supplying nestloop, even if there aren't any
unparameterized paths available. This should bring planning of
LATERAL-containing queries to the same level as queries not using that
feature.
Along the way, simplify management of parameterized paths in add_path()
and friends. In the original coding for parameterized paths in 9.2,
I tried to minimize the logic changes in add_path(), so it just treated
parameterization as yet another dimension of comparison for paths.
We later made it ignore pathkeys (sort ordering) of parameterized paths,
on the grounds that ordering isn't a useful property for the path on the
inside of a nestloop, so we might as well get rid of useless parameterized
paths as quickly as possible. But we didn't take that reasoning as far as
we should have. Startup cost isn't a useful property inside a nestloop
either, so add_path() ought to discount startup cost of parameterized paths
as well. Having done that, the secondary sorting I'd implemented (in
add_parameterized_path) is no longer needed --- any parameterized path that
survives add_path() at all is worth considering at higher levels. So this
should be a bit faster as well as simpler.
2012-08-30 04:05:27 +02:00
|
|
|
* (no duplicates) output from relation; NULL if not yet requested
|
|
|
|
* cheapest_parameterized_paths - best paths for their parameterizations;
|
|
|
|
* always includes cheapest_total_path, even if that's unparameterized
|
2015-12-11 21:52:16 +01:00
|
|
|
* direct_lateral_relids - rels this rel has direct LATERAL references to
|
2015-12-08 00:56:14 +01:00
|
|
|
* lateral_relids - required outer rels for LATERAL, as a Relids set
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
* (includes both direct and indirect lateral references)
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2002-03-12 01:52:10 +01:00
|
|
|
* If the relation is a base relation it will have these fields set:
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
2003-02-08 21:20:55 +01:00
|
|
|
* relid - RTE index (this is redundant with the relids field, but
|
|
|
|
* is provided for convenience of access)
|
Make the planner assume that the entries in a VALUES list are distinct.
Previously, if we had to estimate the number of distinct values in a
VALUES column, we fell back on the default behavior used whenever we lack
statistics, which effectively is that there are Min(# of entries, 200)
distinct values. This can be very badly off with a large VALUES list,
as noted by Jeff Janes.
We could consider actually running an ANALYZE-like scan on the VALUES,
but that seems unduly expensive, and anyway it could not deliver reliable
info if the entries are not all constants. What seems like a better choice
is to assume that the values are all distinct. This will sometimes be just
as wrong as the old code, but it seems more likely to be more nearly right
in many common cases. Also, it is more consistent with what happens in
some related cases, for example WHERE x = ANY(ARRAY[1,2,3,...,n]) and
WHERE x = ANY(VALUES (1),(2),(3),...,(n)) now are estimated similarly.
This was discussed some time ago, but consensus was it'd be better
to slip it in at the start of a development cycle not near the end.
(It should've gone into v10, really, but I forgot about it.)
Discussion: https://postgr.es/m/CAMkU=1xHkyPa8VQgGcCNg3RMFFvVxUdOpus1gKcFuvVi0w6Acg@mail.gmail.com
2017-08-16 21:37:14 +02:00
|
|
|
* rtekind - copy of RTE's rtekind field
|
2003-06-30 01:05:05 +02:00
|
|
|
* min_attr, max_attr - range of valid AttrNumbers for rel
|
|
|
|
* attr_needed - array of bitmapsets indicating the highest joinrel
|
|
|
|
* in which each attribute is needed; if bit 0 is set then
|
|
|
|
* the attribute is needed as part of final targetlist
|
|
|
|
* attr_widths - cache space for per-attribute width estimates;
|
|
|
|
* zero means not computed yet
|
Do assorted mop-up in the planner.
Remove RestrictInfo.nullable_relids, along with a good deal of
infrastructure that calculated it. One use-case for it was in
join_clause_is_movable_to, but we can now replace that usage with
a check to see if the clause's relids include any outer join
that can null the target relation. The other use-case was in
join_clause_is_movable_into, but that test can just be dropped
entirely now that the clause's relids include outer joins.
Furthermore, join_clause_is_movable_into should now be
accurate enough that it will accept anything returned by
generate_join_implied_equalities, so we can restore the Assert
that was diked out in commit 95f4e59c3.
Remove the outerjoin_delayed mechanism. We needed this before to
prevent quals from getting evaluated below outer joins that should
null some of their vars. Now that we consider varnullingrels while
placing quals, that's taken care of automatically, so throw the
whole thing away.
Teach remove_useless_result_rtes to also remove useless FromExprs.
Having done that, the delay_upper_joins flag serves no purpose any
more and we can remove it, largely reverting 11086f2f2.
Use constant TRUE for "dummy" clauses when throwing back outer joins.
This improves on a hack I introduced in commit 6a6522529. If we
have a left-join clause l.x = r.y, and a WHERE clause l.x = constant,
we generate r.y = constant and then don't really have a need for the
join clause. But we must throw the join clause back anyway after
marking it redundant, so that the join search heuristics won't think
this is a clauseless join and avoid it. That was a kluge introduced
under time pressure, and after looking at it I thought of a better
way: let's just introduce constant-TRUE "join clauses" instead,
and get rid of them at the end. This improves the generated plans for
such cases by not having to test a redundant join clause. We can also
get rid of the ugly hack used to mark such clauses as redundant for
selectivity estimation.
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
|
|
|
* nulling_relids - relids of outer joins that can null this rel
|
2012-08-27 04:48:55 +02:00
|
|
|
* lateral_vars - lateral cross-references of rel, if any (list of
|
|
|
|
* Vars and PlaceHolderVars)
|
2013-08-18 02:22:37 +02:00
|
|
|
* lateral_referencers - relids of rels that reference this one laterally
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
* (includes both direct and indirect lateral references)
|
2001-05-20 22:28:20 +02:00
|
|
|
* indexlist - list of IndexOptInfo nodes for relation's indexes
|
2023-01-09 05:15:08 +01:00
|
|
|
* (always NIL if it's not a table or partitioned table)
|
2002-05-12 22:10:05 +02:00
|
|
|
* pages - number of disk pages in relation (zero if not a table)
|
2000-01-09 01:26:47 +01:00
|
|
|
* tuples - number of tuples in relation (not considering restrictions)
|
2011-10-14 23:23:01 +02:00
|
|
|
* allvisfrac - fraction of disk pages that are marked all-visible
|
Speed up finding EquivalenceClasses for a given set of rels
Previously in order to determine which ECs a relation had members in, we
had to loop over all ECs stored in PlannerInfo's eq_classes and check if
ec_relids mentioned the relation. For the most part, this was fine, as
generally, unless queries were fairly complex, the overhead of performing
the lookup would have not been that significant. However, when queries
contained large numbers of joins and ECs, the overhead to find the set of
classes matching a given set of relations could become a significant
portion of the overall planning effort.
Here we allow a much more efficient method to access the ECs which match a
given relation or set of relations. A new Bitmapset field in RelOptInfo
now exists to store the indexes into PlannerInfo's eq_classes list which
each relation is mentioned in. This allows very fast lookups to find all
ECs belonging to a single relation. When we need to lookup ECs belonging
to a given pair of relations, we can simply bitwise-AND the Bitmapsets from
each relation and use the result to perform the lookup.
We also take the opportunity to write a new implementation of
generate_join_implied_equalities which makes use of the new indexes.
generate_join_implied_equalities_for_ecs must remain as is as it can be
given a custom list of ECs, which we can't easily determine the indexes of.
This was originally intended to fix the performance penalty of looking up
foreign keys matching a join condition which was introduced by 100340e2d.
However, we're speeding up much more than just that here.
Author: David Rowley, Tom Lane
Reviewed-by: Tom Lane, Tomas Vondra
Discussion: https://postgr.es/m/6970.1545327857@sss.pgh.pa.us
2019-07-21 07:30:58 +02:00
|
|
|
* eclass_indexes - EquivalenceClasses that mention this rel (filled
|
|
|
|
* only after EC merging is complete)
|
2011-09-03 21:35:12 +02:00
|
|
|
* subroot - PlannerInfo for subquery (NULL if it's not a subquery)
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
* subplan_params - list of PlannerParamItems to be passed to subquery
|
2000-09-29 20:21:41 +02:00
|
|
|
*
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
* Note: for a subquery, tuples and subroot are not set immediately
|
2000-09-29 20:21:41 +02:00
|
|
|
* upon creation of the RelOptInfo object; they are filled in when
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
* set_subquery_pathlist processes the object.
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
2006-01-31 22:39:25 +01:00
|
|
|
* For otherrels that are appendrel members, these fields are filled
|
2015-12-08 00:56:14 +01:00
|
|
|
* in just as for a baserel, except we don't bother with lateral_vars.
|
2002-03-12 01:52:10 +01:00
|
|
|
*
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
* If the relation is either a foreign table or a join of foreign tables that
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
* all belong to the same foreign server and are assigned to the same user to
|
|
|
|
* check access permissions as (cf checkAsUser), these fields will be set:
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
*
|
|
|
|
* serverid - OID of foreign server, if foreign table (else InvalidOid)
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
* userid - OID of user to check access as (InvalidOid means current user)
|
|
|
|
* useridiscurrent - we've assumed that userid equals current user
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
* fdwroutine - function hooks for FDW, if foreign table (else NULL)
|
|
|
|
* fdw_private - private state for FDW, if foreign table (else NULL)
|
|
|
|
*
|
2017-04-08 04:20:03 +02:00
|
|
|
* Two fields are used to cache knowledge acquired during the join search
|
|
|
|
* about whether this rel is provably unique when being joined to given other
|
|
|
|
* relation(s), ie, it can have at most one row matching any given row from
|
|
|
|
* that join relation. Currently we only attempt such proofs, and thus only
|
|
|
|
* populate these fields, for base rels; but someday they might be used for
|
|
|
|
* join rels too:
|
|
|
|
*
|
|
|
|
* unique_for_rels - list of Relid sets, each one being a set of other
|
|
|
|
* rels for which this one has been proven unique
|
|
|
|
* non_unique_for_rels - list of Relid sets, each one being a set of
|
|
|
|
* other rels for which we have tried and failed to prove
|
|
|
|
* this one unique
|
|
|
|
*
|
2017-09-21 05:33:04 +02:00
|
|
|
* The presence of the following fields depends on the restrictions
|
2002-03-12 01:52:10 +01:00
|
|
|
* and joins that the relation participates in:
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2000-02-07 05:41:04 +01:00
|
|
|
* baserestrictinfo - List of RestrictInfo nodes, containing info about
|
2005-06-09 06:19:00 +02:00
|
|
|
* each non-join qualification clause in which this relation
|
2000-02-07 05:41:04 +01:00
|
|
|
* participates (only used for base rels)
|
2000-02-15 21:49:31 +01:00
|
|
|
* baserestrictcost - Estimated cost of evaluating the baserestrictinfo
|
|
|
|
* clauses at a single tuple (only used for base rels)
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
* baserestrict_min_security - Smallest security_level found among
|
|
|
|
* clauses in baserestrictinfo
|
2005-06-09 06:19:00 +02:00
|
|
|
* joininfo - List of RestrictInfo nodes, containing info about each
|
2007-01-20 21:45:41 +01:00
|
|
|
* join clause in which this relation participates (but
|
|
|
|
* note this excludes clauses that might be derivable from
|
|
|
|
* EquivalenceClasses)
|
|
|
|
* has_eclass_joins - flag that EquivalenceClass joins are possible
|
2000-02-07 05:41:04 +01:00
|
|
|
*
|
|
|
|
* Note: Keeping a restrictinfo list in the RelOptInfo is useful only for
|
|
|
|
* base rels, because for a join rel the set of clauses that are treated as
|
|
|
|
* restrict clauses varies depending on which sub-relations we choose to join.
|
|
|
|
* (For example, in a 3-base-rel join, a clause relating rels 1 and 2 must be
|
|
|
|
* treated as a restrictclause if we join {1} and {2 3} to make {1 2 3}; but
|
|
|
|
* if we join {1 2} and {3} then that clause will be a restrictclause in {1 2}
|
|
|
|
* and should not be processed again at the level of {1 2 3}.) Therefore,
|
|
|
|
* the restrictinfo list in the join case appears in individual JoinPaths
|
|
|
|
* (field joinrestrictinfo), not in the parent relation. But it's OK for
|
2005-06-09 06:19:00 +02:00
|
|
|
* the RelOptInfo to store the joininfo list, because that is the same
|
2000-02-07 05:41:04 +01:00
|
|
|
* for a given rel no matter how we form it.
|
2000-02-15 21:49:31 +01:00
|
|
|
*
|
|
|
|
* We store baserestrictcost in the RelOptInfo (for base relations) because
|
|
|
|
* we know we will need it at least once (to price the sequential scan)
|
|
|
|
* and may need it multiple times to price index scans.
|
2017-09-21 05:33:04 +02:00
|
|
|
*
|
2020-04-03 23:00:25 +02:00
|
|
|
* A join relation is considered to be partitioned if it is formed from a
|
|
|
|
* join of two relations that are partitioned, have matching partitioning
|
|
|
|
* schemes, and are joined on an equijoin of the partitioning columns.
|
|
|
|
* Under those conditions we can consider the join relation to be partitioned
|
|
|
|
* by either relation's partitioning keys, though some care is needed if
|
|
|
|
* either relation can be forced to null by outer-joining. For example, an
|
|
|
|
* outer join like (A LEFT JOIN B ON A.a = B.b) may produce rows with B.b
|
|
|
|
* NULL. These rows may not fit the partitioning conditions imposed on B.
|
|
|
|
* Hence, strictly speaking, the join is not partitioned by B.b and thus
|
|
|
|
* partition keys of an outer join should include partition key expressions
|
|
|
|
* from the non-nullable side only. However, if a subsequent join uses
|
|
|
|
* strict comparison operators (and all commonly-used equijoin operators are
|
|
|
|
* strict), the presence of nulls doesn't cause a problem: such rows couldn't
|
|
|
|
* match anything on the other side and thus they don't create a need to do
|
|
|
|
* any cross-partition sub-joins. Hence we can treat such values as still
|
|
|
|
* partitioning the join output for the purpose of additional partitionwise
|
|
|
|
* joining, so long as a strict join operator is used by the next join.
|
|
|
|
*
|
2017-09-21 05:33:04 +02:00
|
|
|
* If the relation is partitioned, these fields will be set:
|
|
|
|
*
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
* part_scheme - Partitioning scheme of the relation
|
|
|
|
* nparts - Number of partitions
|
|
|
|
* boundinfo - Partition bounds
|
Allow partitionwise joins in more cases.
Previously, the partitionwise join technique only allowed partitionwise
join when input partitioned tables had exactly the same partition
bounds. This commit extends the technique to some cases when the tables
have different partition bounds, by using an advanced partition-matching
algorithm introduced by this commit. For both the input partitioned
tables, the algorithm checks whether every partition of one input
partitioned table only matches one partition of the other input
partitioned table at most, and vice versa. In such a case the join
between the tables can be broken down into joins between the matching
partitions, so the algorithm produces the pairs of the matching
partitions, plus the partition bounds for the join relation, to allow
partitionwise join for computing the join. Currently, the algorithm
works for list-partitioned and range-partitioned tables, but not
hash-partitioned tables. See comments in partition_bounds_merge().
Ashutosh Bapat and Etsuro Fujita, most of regression tests by Rajkumar
Raghuwanshi, some of the tests by Mark Dilger and Amul Sul, reviewed by
Dmitry Dolgov and Amul Sul, with additional review at various points by
Ashutosh Bapat, Mark Dilger, Robert Haas, Antonin Houska, Amit Langote,
Justin Pryzby, and Tomas Vondra
Discussion: https://postgr.es/m/CAFjFpRdjQvaUEV5DJX3TW6pU5eq54NCkadtxHX2JiJG_GvbrCA@mail.gmail.com
2020-04-08 03:25:00 +02:00
|
|
|
* partbounds_merged - true if partition bounds are merged ones
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
* partition_qual - Partition constraint if not the root
|
|
|
|
* part_rels - RelOptInfos for each partition
|
Allow partitionwise joins in more cases.
Previously, the partitionwise join technique only allowed partitionwise
join when input partitioned tables had exactly the same partition
bounds. This commit extends the technique to some cases when the tables
have different partition bounds, by using an advanced partition-matching
algorithm introduced by this commit. For both the input partitioned
tables, the algorithm checks whether every partition of one input
partitioned table only matches one partition of the other input
partitioned table at most, and vice versa. In such a case the join
between the tables can be broken down into joins between the matching
partitions, so the algorithm produces the pairs of the matching
partitions, plus the partition bounds for the join relation, to allow
partitionwise join for computing the join. Currently, the algorithm
works for list-partitioned and range-partitioned tables, but not
hash-partitioned tables. See comments in partition_bounds_merge().
Ashutosh Bapat and Etsuro Fujita, most of regression tests by Rajkumar
Raghuwanshi, some of the tests by Mark Dilger and Amul Sul, reviewed by
Dmitry Dolgov and Amul Sul, with additional review at various points by
Ashutosh Bapat, Mark Dilger, Robert Haas, Antonin Houska, Amit Langote,
Justin Pryzby, and Tomas Vondra
Discussion: https://postgr.es/m/CAFjFpRdjQvaUEV5DJX3TW6pU5eq54NCkadtxHX2JiJG_GvbrCA@mail.gmail.com
2020-04-08 03:25:00 +02:00
|
|
|
* all_partrels - Relids set of all partition relids
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
* partexprs, nullable_partexprs - Partition key expressions
|
2017-09-21 05:33:04 +02:00
|
|
|
*
|
2020-04-03 23:00:25 +02:00
|
|
|
* The partexprs and nullable_partexprs arrays each contain
|
|
|
|
* part_scheme->partnatts elements. Each of the elements is a list of
|
|
|
|
* partition key expressions. For partitioned base relations, there is one
|
|
|
|
* expression in each partexprs element, and nullable_partexprs is empty.
|
|
|
|
* For partitioned join relations, each base relation within the join
|
|
|
|
* contributes one partition key expression per partitioning column;
|
|
|
|
* that expression goes in the partexprs[i] list if the base relation
|
|
|
|
* is not nullable by this join or any lower outer join, or in the
|
|
|
|
* nullable_partexprs[i] list if the base relation is nullable.
|
2020-04-08 04:12:14 +02:00
|
|
|
* Furthermore, FULL JOINs add extra nullable_partexprs expressions
|
|
|
|
* corresponding to COALESCE expressions of the left and right join columns,
|
|
|
|
* to simplify matching join clauses to those lists.
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
*
|
|
|
|
* Not all fields are printed. (In some cases, there is no print support for
|
|
|
|
* the field type.)
|
2000-09-29 20:21:41 +02:00
|
|
|
*----------
|
1996-08-28 03:59:28 +02:00
|
|
|
*/
|
2021-02-27 10:59:36 +01:00
|
|
|
|
|
|
|
/* Bitmask of flags supported by table AMs */
|
|
|
|
#define AMFLAG_HAS_TID_RANGE (1 << 0)
|
|
|
|
|
2002-03-12 01:52:10 +01:00
|
|
|
typedef enum RelOptKind
|
|
|
|
{
|
|
|
|
RELOPT_BASEREL,
|
|
|
|
RELOPT_JOINREL,
|
2010-03-29 00:59:34 +02:00
|
|
|
RELOPT_OTHER_MEMBER_REL,
|
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
|
|
|
RELOPT_OTHER_JOINREL,
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
RELOPT_UPPER_REL,
|
2023-02-13 19:35:38 +01:00
|
|
|
RELOPT_OTHER_UPPER_REL
|
2002-03-12 01:52:10 +01:00
|
|
|
} RelOptKind;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
Abstract logic to allow for multiple kinds of child rels.
Currently, the only type of child relation is an "other member rel",
which is the child of a baserel, but in the future joins and even
upper relations may have child rels. To facilitate that, introduce
macros that test to test for particular RelOptKind values, and use
them in various places where they help to clarify the sense of a test.
(For example, a test may allow RELOPT_OTHER_MEMBER_REL either because
it intends to allow child rels, or because it intends to allow simple
rels.)
Also, remove find_childrel_top_parent, which will not work for a
child rel that is not a baserel. Instead, add a new RelOptInfo
member top_parent_relids to track the same kind of information in a
more generic manner.
Ashutosh Bapat, slightly tweaked by me. Review and testing of the
patch set from which this was taken by Rajkumar Raghuwanshi and Rafia
Sabih.
Discussion: http://postgr.es/m/CA+TgmoagTnF2yqR3PT2rv=om=wJiZ4-A+ATwdnriTGku1CLYxA@mail.gmail.com
2017-04-04 04:41:31 +02:00
|
|
|
/*
|
|
|
|
* Is the given relation a simple relation i.e a base or "other" member
|
|
|
|
* relation?
|
|
|
|
*/
|
|
|
|
#define IS_SIMPLE_REL(rel) \
|
|
|
|
((rel)->reloptkind == RELOPT_BASEREL || \
|
|
|
|
(rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
|
|
|
|
|
|
|
|
/* Is the given relation a join relation? */
|
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
|
|
|
#define IS_JOIN_REL(rel) \
|
|
|
|
((rel)->reloptkind == RELOPT_JOINREL || \
|
|
|
|
(rel)->reloptkind == RELOPT_OTHER_JOINREL)
|
Abstract logic to allow for multiple kinds of child rels.
Currently, the only type of child relation is an "other member rel",
which is the child of a baserel, but in the future joins and even
upper relations may have child rels. To facilitate that, introduce
macros that test to test for particular RelOptKind values, and use
them in various places where they help to clarify the sense of a test.
(For example, a test may allow RELOPT_OTHER_MEMBER_REL either because
it intends to allow child rels, or because it intends to allow simple
rels.)
Also, remove find_childrel_top_parent, which will not work for a
child rel that is not a baserel. Instead, add a new RelOptInfo
member top_parent_relids to track the same kind of information in a
more generic manner.
Ashutosh Bapat, slightly tweaked by me. Review and testing of the
patch set from which this was taken by Rajkumar Raghuwanshi and Rafia
Sabih.
Discussion: http://postgr.es/m/CA+TgmoagTnF2yqR3PT2rv=om=wJiZ4-A+ATwdnriTGku1CLYxA@mail.gmail.com
2017-04-04 04:41:31 +02:00
|
|
|
|
|
|
|
/* Is the given relation an upper relation? */
|
Implement partition-wise grouping/aggregation.
If the partition keys of input relation are part of the GROUP BY
clause, all the rows belonging to a given group come from a single
partition. This allows aggregation/grouping over a partitioned
relation to be broken down * into aggregation/grouping on each
partition. This should be no worse, and often better, than the normal
approach.
If the GROUP BY clause does not contain all the partition keys, we can
still perform partial aggregation for each partition and then finalize
aggregation after appending the partial results. This is less certain
to be a win, but it's still useful.
Jeevan Chalke, Ashutosh Bapat, Robert Haas. The larger patch series
of which this patch is a part was also reviewed and tested by Antonin
Houska, Rajkumar Raghuwanshi, David Rowley, Dilip Kumar, Konstantin
Knizhnik, Pascal Legrand, and Rafia Sabih.
Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com
2018-03-22 17:49:48 +01:00
|
|
|
#define IS_UPPER_REL(rel) \
|
|
|
|
((rel)->reloptkind == RELOPT_UPPER_REL || \
|
|
|
|
(rel)->reloptkind == RELOPT_OTHER_UPPER_REL)
|
Abstract logic to allow for multiple kinds of child rels.
Currently, the only type of child relation is an "other member rel",
which is the child of a baserel, but in the future joins and even
upper relations may have child rels. To facilitate that, introduce
macros that test to test for particular RelOptKind values, and use
them in various places where they help to clarify the sense of a test.
(For example, a test may allow RELOPT_OTHER_MEMBER_REL either because
it intends to allow child rels, or because it intends to allow simple
rels.)
Also, remove find_childrel_top_parent, which will not work for a
child rel that is not a baserel. Instead, add a new RelOptInfo
member top_parent_relids to track the same kind of information in a
more generic manner.
Ashutosh Bapat, slightly tweaked by me. Review and testing of the
patch set from which this was taken by Rajkumar Raghuwanshi and Rafia
Sabih.
Discussion: http://postgr.es/m/CA+TgmoagTnF2yqR3PT2rv=om=wJiZ4-A+ATwdnriTGku1CLYxA@mail.gmail.com
2017-04-04 04:41:31 +02:00
|
|
|
|
|
|
|
/* Is the given relation an "other" relation? */
|
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
|
|
|
#define IS_OTHER_REL(rel) \
|
|
|
|
((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \
|
Implement partition-wise grouping/aggregation.
If the partition keys of input relation are part of the GROUP BY
clause, all the rows belonging to a given group come from a single
partition. This allows aggregation/grouping over a partitioned
relation to be broken down * into aggregation/grouping on each
partition. This should be no worse, and often better, than the normal
approach.
If the GROUP BY clause does not contain all the partition keys, we can
still perform partial aggregation for each partition and then finalize
aggregation after appending the partial results. This is less certain
to be a win, but it's still useful.
Jeevan Chalke, Ashutosh Bapat, Robert Haas. The larger patch series
of which this patch is a part was also reviewed and tested by Antonin
Houska, Rajkumar Raghuwanshi, David Rowley, Dilip Kumar, Konstantin
Knizhnik, Pascal Legrand, and Rafia Sabih.
Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com
2018-03-22 17:49:48 +01:00
|
|
|
(rel)->reloptkind == RELOPT_OTHER_JOINREL || \
|
|
|
|
(rel)->reloptkind == RELOPT_OTHER_UPPER_REL)
|
Abstract logic to allow for multiple kinds of child rels.
Currently, the only type of child relation is an "other member rel",
which is the child of a baserel, but in the future joins and even
upper relations may have child rels. To facilitate that, introduce
macros that test to test for particular RelOptKind values, and use
them in various places where they help to clarify the sense of a test.
(For example, a test may allow RELOPT_OTHER_MEMBER_REL either because
it intends to allow child rels, or because it intends to allow simple
rels.)
Also, remove find_childrel_top_parent, which will not work for a
child rel that is not a baserel. Instead, add a new RelOptInfo
member top_parent_relids to track the same kind of information in a
more generic manner.
Ashutosh Bapat, slightly tweaked by me. Review and testing of the
patch set from which this was taken by Rajkumar Raghuwanshi and Rafia
Sabih.
Discussion: http://postgr.es/m/CA+TgmoagTnF2yqR3PT2rv=om=wJiZ4-A+ATwdnriTGku1CLYxA@mail.gmail.com
2017-04-04 04:41:31 +02:00
|
|
|
|
1998-07-18 06:22:52 +02:00
|
|
|
typedef struct RelOptInfo
|
1996-08-28 03:59:28 +02:00
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
NodeTag type;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-03-12 01:52:10 +01:00
|
|
|
RelOptKind reloptkind;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* all relations included in this RelOptInfo; set of base + OJ relids
|
2022-07-02 12:33:07 +02:00
|
|
|
* (rangetable indexes)
|
|
|
|
*/
|
|
|
|
Relids relids;
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* size estimates generated by planner
|
|
|
|
*/
|
|
|
|
/* estimated number of result tuples */
|
|
|
|
Cardinality rows;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* per-relation planner control flags
|
|
|
|
*/
|
|
|
|
/* keep cheap-startup-cost paths? */
|
|
|
|
bool consider_startup;
|
|
|
|
/* ditto, for parameterized paths? */
|
|
|
|
bool consider_param_startup;
|
|
|
|
/* consider parallel paths? */
|
|
|
|
bool consider_parallel;
|
2012-09-02 00:16:24 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* default result targetlist for Paths scanning this relation; list of
|
|
|
|
* Vars/Exprs, cost, width
|
|
|
|
*/
|
|
|
|
struct PathTarget *reltarget;
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* materialization information
|
|
|
|
*/
|
1999-02-04 02:47:02 +01:00
|
|
|
List *pathlist; /* Path structures */
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
List *ppilist; /* ParamPathInfos used in pathlist */
|
2016-01-20 20:29:22 +01:00
|
|
|
List *partial_pathlist; /* partial Paths */
|
2000-02-15 21:49:31 +01:00
|
|
|
struct Path *cheapest_startup_path;
|
|
|
|
struct Path *cheapest_total_path;
|
2003-01-20 19:55:07 +01:00
|
|
|
struct Path *cheapest_unique_path;
|
2012-01-28 01:26:38 +01:00
|
|
|
List *cheapest_parameterized_paths;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* parameterization information needed for both base rels and join rels
|
|
|
|
* (see also lateral_vars and lateral_referencers)
|
|
|
|
*/
|
|
|
|
/* rels directly laterally referenced */
|
|
|
|
Relids direct_lateral_relids;
|
|
|
|
/* minimum parameterization of rel */
|
|
|
|
Relids lateral_relids;
|
2015-12-08 00:56:14 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* information about a base rel (not set for join rels!)
|
|
|
|
*/
|
2003-02-08 21:20:55 +01:00
|
|
|
Index relid;
|
2022-07-02 12:33:07 +02:00
|
|
|
/* containing tablespace */
|
|
|
|
Oid reltablespace;
|
|
|
|
/* RELATION, SUBQUERY, FUNCTION, etc */
|
|
|
|
RTEKind rtekind;
|
|
|
|
/* smallest attrno of rel (often <0) */
|
|
|
|
AttrNumber min_attr;
|
|
|
|
/* largest attrno of rel */
|
|
|
|
AttrNumber max_attr;
|
|
|
|
/* array indexed [min_attr .. max_attr] */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Relids *attr_needed pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* array indexed [min_attr .. max_attr] */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
int32 *attr_widths pg_node_attr(read_write_ignore);
|
Do assorted mop-up in the planner.
Remove RestrictInfo.nullable_relids, along with a good deal of
infrastructure that calculated it. One use-case for it was in
join_clause_is_movable_to, but we can now replace that usage with
a check to see if the clause's relids include any outer join
that can null the target relation. The other use-case was in
join_clause_is_movable_into, but that test can just be dropped
entirely now that the clause's relids include outer joins.
Furthermore, join_clause_is_movable_into should now be
accurate enough that it will accept anything returned by
generate_join_implied_equalities, so we can restore the Assert
that was diked out in commit 95f4e59c3.
Remove the outerjoin_delayed mechanism. We needed this before to
prevent quals from getting evaluated below outer joins that should
null some of their vars. Now that we consider varnullingrels while
placing quals, that's taken care of automatically, so throw the
whole thing away.
Teach remove_useless_result_rtes to also remove useless FromExprs.
Having done that, the delay_upper_joins flag serves no purpose any
more and we can remove it, largely reverting 11086f2f2.
Use constant TRUE for "dummy" clauses when throwing back outer joins.
This improves on a hack I introduced in commit 6a6522529. If we
have a left-join clause l.x = r.y, and a WHERE clause l.x = constant,
we generate r.y = constant and then don't really have a need for the
join clause. But we must throw the join clause back anyway after
marking it redundant, so that the join search heuristics won't think
this is a clauseless join and avoid it. That was a kluge introduced
under time pressure, and after looking at it I thought of a better
way: let's just introduce constant-TRUE "join clauses" instead,
and get rid of them at the end. This improves the generated plans for
such cases by not having to test a redundant join clause. We can also
get rid of the ugly hack used to mark such clauses as redundant for
selectivity estimation.
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
|
|
|
/* relids of outer joins that can null this baserel */
|
|
|
|
Relids nulling_relids;
|
2022-07-02 12:33:07 +02:00
|
|
|
/* LATERAL Vars and PHVs referenced by rel */
|
|
|
|
List *lateral_vars;
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* rels that reference this baserel laterally */
|
2022-07-02 12:33:07 +02:00
|
|
|
Relids lateral_referencers;
|
|
|
|
/* list of IndexOptInfo */
|
|
|
|
List *indexlist;
|
|
|
|
/* list of StatisticExtInfo */
|
|
|
|
List *statlist;
|
|
|
|
/* size estimates derived from pg_class */
|
|
|
|
BlockNumber pages;
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality tuples;
|
2011-10-14 23:23:01 +02:00
|
|
|
double allvisfrac;
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* indexes in PlannerInfo's eq_classes list of ECs that mention this rel */
|
2022-07-02 12:33:07 +02:00
|
|
|
Bitmapset *eclass_indexes;
|
2011-09-03 21:35:12 +02:00
|
|
|
PlannerInfo *subroot; /* if subquery */
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
List *subplan_params; /* if subquery */
|
2022-07-02 12:33:07 +02:00
|
|
|
/* wanted number of parallel workers */
|
|
|
|
int rel_parallel_workers;
|
|
|
|
/* Bitmask of optional features supported by the table AM */
|
|
|
|
uint32 amflags;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Information about foreign tables and foreign joins
|
|
|
|
*/
|
|
|
|
/* identifies server for the table or join */
|
|
|
|
Oid serverid;
|
2022-11-30 12:07:03 +01:00
|
|
|
/* identifies user to check access as; 0 means to check as current user */
|
2022-07-02 12:33:07 +02:00
|
|
|
Oid userid;
|
|
|
|
/* join is only valid for current user */
|
|
|
|
bool useridiscurrent;
|
Revise FDW planning API, again.
Further reflection shows that a single callback isn't very workable if we
desire to let FDWs generate multiple Paths, because that forces the FDW to
do all work necessary to generate a valid Plan node for each Path. Instead
split the former PlanForeignScan API into three steps: GetForeignRelSize,
GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking
the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain,
and it's substantially more flexible for complex FDWs.
Add an fdw_private field to RelOptInfo so that the new functions can save
state there rather than possibly having to recalculate information two or
three times.
In addition, we'd not thought through what would be needed to allow an FDW
to set up subexpressions of its choice for runtime execution. We could
treat ForeignScan.fdw_private as an executable expression but that seems
likely to break existing FDWs unnecessarily (in particular, it would
restrict the set of node types allowable in fdw_private to those supported
by expression_tree_walker). Instead, invent a separate field fdw_exprs
which will receive the postprocessing appropriate for expression trees.
(One field is enough since it can be a list of expressions; also, we assume
the corresponding expression state tree(s) will be held within fdw_state,
so we don't need to add anything to ForeignScanState.)
Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this
further as we continue to work on that patch, but to me it feels a lot
closer to being right now.
2012-03-09 18:48:48 +01:00
|
|
|
/* use "struct FdwRoutine" to avoid including fdwapi.h here */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
struct FdwRoutine *fdwroutine pg_node_attr(read_write_ignore);
|
|
|
|
void *fdw_private pg_node_attr(read_write_ignore);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* cache space for remembering if we have proven this relation unique
|
|
|
|
*/
|
|
|
|
/* known unique for these other relid set(s) */
|
2022-11-13 16:22:45 +01:00
|
|
|
List *unique_for_rels;
|
2022-07-02 12:33:07 +02:00
|
|
|
/* known not unique for these set(s) */
|
2022-11-13 16:22:45 +01:00
|
|
|
List *non_unique_for_rels;
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* used by various scans and joins:
|
|
|
|
*/
|
|
|
|
/* RestrictInfo structures (if base rel) */
|
|
|
|
List *baserestrictinfo;
|
|
|
|
/* cost of evaluating the above */
|
|
|
|
QualCost baserestrictcost;
|
|
|
|
/* min security_level found in baserestrictinfo */
|
|
|
|
Index baserestrict_min_security;
|
|
|
|
/* RestrictInfo structures for join clauses involving this rel */
|
|
|
|
List *joininfo;
|
|
|
|
/* T means joininfo is incomplete */
|
|
|
|
bool has_eclass_joins;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* used by partitionwise joins:
|
|
|
|
*/
|
|
|
|
/* consider partitionwise join paths? (if partitioned rel) */
|
|
|
|
bool consider_partitionwise_join;
|
2022-08-18 18:36:06 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* inheritance links, if this is an otherrel (otherwise NULL):
|
|
|
|
*/
|
|
|
|
/* Immediate parent relation (dumping it would be too verbose) */
|
|
|
|
struct RelOptInfo *parent pg_node_attr(read_write_ignore);
|
|
|
|
/* Topmost parent relation (dumping it would be too verbose) */
|
|
|
|
struct RelOptInfo *top_parent pg_node_attr(read_write_ignore);
|
|
|
|
/* Relids of topmost parent (redundant, but handy) */
|
2022-07-02 12:33:07 +02:00
|
|
|
Relids top_parent_relids;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* used for partitioned relations:
|
|
|
|
*/
|
|
|
|
/* Partitioning scheme */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
PartitionScheme part_scheme pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Number of partitions; -1 if not yet set; in case of a join relation 0
|
|
|
|
* means it's considered unpartitioned
|
|
|
|
*/
|
2022-07-20 19:54:25 +02:00
|
|
|
int nparts;
|
2022-07-02 12:33:07 +02:00
|
|
|
/* Partition bounds */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
struct PartitionBoundInfoData *boundinfo pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* True if partition bounds were created by partition_bounds_merge() */
|
|
|
|
bool partbounds_merged;
|
|
|
|
/* Partition constraint, if not the root */
|
2022-07-20 19:54:25 +02:00
|
|
|
List *partition_qual;
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Array of RelOptInfos of partitions, stored in the same order as bounds
|
2022-07-20 19:54:25 +02:00
|
|
|
* (don't print, too bulky and duplicative)
|
2022-07-02 12:33:07 +02:00
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
struct RelOptInfo **part_rels pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Bitmap with members acting as indexes into the part_rels[] array to
|
|
|
|
* indicate which partitions survived partition pruning.
|
|
|
|
*/
|
|
|
|
Bitmapset *live_parts;
|
|
|
|
/* Relids set of all partition relids */
|
|
|
|
Relids all_partrels;
|
2022-07-20 19:54:25 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* These arrays are of length partkey->partnatts, which we don't have at
|
|
|
|
* hand, so don't try to print
|
|
|
|
*/
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* Non-nullable partition key expressions */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
List **partexprs pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* Nullable partition key expressions */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
List **nullable_partexprs pg_node_attr(read_write_ignore);
|
1999-02-04 02:47:02 +01:00
|
|
|
} RelOptInfo;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
|
|
|
/*
|
|
|
|
* Is given relation partitioned?
|
|
|
|
*
|
2018-02-05 23:31:57 +01:00
|
|
|
* It's not enough to test whether rel->part_scheme is set, because it might
|
|
|
|
* be that the basic partitioning properties of the input relations matched
|
Fix handling of targetlist SRFs when scan/join relation is known empty.
When we introduced separate ProjectSetPath nodes for application of
set-returning functions in v10, we inadvertently broke some cases where
we're supposed to recognize that the result of a subquery is known to be
empty (contain zero rows). That's because IS_DUMMY_REL was just looking
for a childless AppendPath without allowing for a ProjectSetPath being
possibly stuck on top. In itself, this didn't do anything much worse
than produce slightly worse plans for some corner cases.
Then in v11, commit 11cf92f6e rearranged things to allow the scan/join
targetlist to be applied directly to partial paths before they get
gathered. But it inserted a short-circuit path for dummy relations
that was a little too short: it failed to insert a ProjectSetPath node
at all for a targetlist containing set-returning functions, resulting in
bogus "set-valued function called in context that cannot accept a set"
errors, as reported in bug #15669 from Madelaine Thibaut.
The best way to fix this mess seems to be to reimplement IS_DUMMY_REL
so that it drills down through any ProjectSetPath nodes that might be
there (and it seems like we'd better allow for ProjectionPath as well).
While we're at it, make it look at rel->pathlist not cheapest_total_path,
so that it gives the right answer independently of whether set_cheapest
has been done lately. That dependency looks pretty shaky in the context
of code like apply_scanjoin_target_to_paths, and even if it's not broken
today it'd certainly bite us at some point. (Nastily, unsafe use of the
old coding would almost always work; the hazard comes down to possibly
looking through a dangling pointer, and only once in a blue moon would
you find something there that resulted in the wrong answer.)
It now looks like it was a mistake for IS_DUMMY_REL to be a macro: if
there are any extensions using it, they'll continue to use the old
inadequate logic until they're recompiled, after which they'll fail
to load into server versions predating this fix. Hopefully there are
few such extensions.
Having fixed IS_DUMMY_REL, the special path for dummy rels in
apply_scanjoin_target_to_paths is unnecessary as well as being wrong,
so we can just drop it.
Also change a few places that were testing for partitioned-ness of a
planner relation but not using IS_PARTITIONED_REL for the purpose; that
seems unsafe as well as inconsistent, plus it required an ugly hack in
apply_scanjoin_target_to_paths.
In passing, save a few cycles in apply_scanjoin_target_to_paths by
skipping processing of pre-existing paths for partitioned rels,
and do some cosmetic cleanup and comment adjustment in that function.
I renamed IS_DUMMY_PATH to IS_DUMMY_APPEND with the intention of breaking
any code that might be using it, since in almost every case that would
be wrong; IS_DUMMY_REL is what to be using instead.
In HEAD, also make set_dummy_rel_pathlist static (since it's no longer
used from outside allpaths.c), and delete is_dummy_plan, since it's no
longer used anywhere.
Back-patch as appropriate into v11 and v10.
Tom Lane and Julien Rouhaud
Discussion: https://postgr.es/m/15669-02fb3296cca26203@postgresql.org
2019-03-07 20:21:52 +01:00
|
|
|
* but the partition bounds did not. Also, if we are able to prove a rel
|
|
|
|
* dummy (empty), we should henceforth treat it as unpartitioned.
|
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
|
|
|
*/
|
|
|
|
#define IS_PARTITIONED_REL(rel) \
|
2018-02-05 23:31:57 +01:00
|
|
|
((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \
|
Fix handling of targetlist SRFs when scan/join relation is known empty.
When we introduced separate ProjectSetPath nodes for application of
set-returning functions in v10, we inadvertently broke some cases where
we're supposed to recognize that the result of a subquery is known to be
empty (contain zero rows). That's because IS_DUMMY_REL was just looking
for a childless AppendPath without allowing for a ProjectSetPath being
possibly stuck on top. In itself, this didn't do anything much worse
than produce slightly worse plans for some corner cases.
Then in v11, commit 11cf92f6e rearranged things to allow the scan/join
targetlist to be applied directly to partial paths before they get
gathered. But it inserted a short-circuit path for dummy relations
that was a little too short: it failed to insert a ProjectSetPath node
at all for a targetlist containing set-returning functions, resulting in
bogus "set-valued function called in context that cannot accept a set"
errors, as reported in bug #15669 from Madelaine Thibaut.
The best way to fix this mess seems to be to reimplement IS_DUMMY_REL
so that it drills down through any ProjectSetPath nodes that might be
there (and it seems like we'd better allow for ProjectionPath as well).
While we're at it, make it look at rel->pathlist not cheapest_total_path,
so that it gives the right answer independently of whether set_cheapest
has been done lately. That dependency looks pretty shaky in the context
of code like apply_scanjoin_target_to_paths, and even if it's not broken
today it'd certainly bite us at some point. (Nastily, unsafe use of the
old coding would almost always work; the hazard comes down to possibly
looking through a dangling pointer, and only once in a blue moon would
you find something there that resulted in the wrong answer.)
It now looks like it was a mistake for IS_DUMMY_REL to be a macro: if
there are any extensions using it, they'll continue to use the old
inadequate logic until they're recompiled, after which they'll fail
to load into server versions predating this fix. Hopefully there are
few such extensions.
Having fixed IS_DUMMY_REL, the special path for dummy rels in
apply_scanjoin_target_to_paths is unnecessary as well as being wrong,
so we can just drop it.
Also change a few places that were testing for partitioned-ness of a
planner relation but not using IS_PARTITIONED_REL for the purpose; that
seems unsafe as well as inconsistent, plus it required an ugly hack in
apply_scanjoin_target_to_paths.
In passing, save a few cycles in apply_scanjoin_target_to_paths by
skipping processing of pre-existing paths for partitioned rels,
and do some cosmetic cleanup and comment adjustment in that function.
I renamed IS_DUMMY_PATH to IS_DUMMY_APPEND with the intention of breaking
any code that might be using it, since in almost every case that would
be wrong; IS_DUMMY_REL is what to be using instead.
In HEAD, also make set_dummy_rel_pathlist static (since it's no longer
used from outside allpaths.c), and delete is_dummy_plan, since it's no
longer used anywhere.
Back-patch as appropriate into v11 and v10.
Tom Lane and Julien Rouhaud
Discussion: https://postgr.es/m/15669-02fb3296cca26203@postgresql.org
2019-03-07 20:21:52 +01:00
|
|
|
(rel)->part_rels && !IS_DUMMY_REL(rel))
|
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
2017-10-06 17:11:10 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Convenience macro to make sure that a partitioned relation has all the
|
|
|
|
* required members set.
|
|
|
|
*/
|
|
|
|
#define REL_HAS_ALL_PART_PROPS(rel) \
|
|
|
|
((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \
|
|
|
|
(rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs)
|
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
/*
|
|
|
|
* IndexOptInfo
|
|
|
|
* Per-index information for planning/optimization
|
|
|
|
*
|
2018-04-07 22:00:39 +02:00
|
|
|
* indexkeys[], indexcollations[] each have ncolumns entries.
|
|
|
|
* opfamily[], and opcintype[] each have nkeycolumns entries. They do
|
|
|
|
* not contain any information about included attributes.
|
2011-03-26 21:35:25 +01:00
|
|
|
*
|
2018-04-07 22:00:39 +02:00
|
|
|
* sortopfamily[], reverse_sort[], and nulls_first[] have
|
|
|
|
* nkeycolumns entries, if the index is ordered; but if it is unordered,
|
2010-11-29 18:29:42 +01:00
|
|
|
* those pointers are NULL.
|
2007-01-09 03:14:16 +01:00
|
|
|
*
|
2003-05-28 18:04:02 +02:00
|
|
|
* Zeroes in the indexkeys[] array indicate index columns that are
|
|
|
|
* expressions; there is one element in indexprs for each such column.
|
2001-05-20 22:28:20 +02:00
|
|
|
*
|
2010-11-29 18:29:42 +01:00
|
|
|
* For an ordered index, reverse_sort[] and nulls_first[] describe the
|
|
|
|
* sort ordering of a forward indexscan; we can also consider a backward
|
|
|
|
* indexscan, which will generate the reverse ordering.
|
2003-05-28 18:04:02 +02:00
|
|
|
*
|
|
|
|
* The indexprs and indpred expressions have been run through
|
2003-12-28 22:57:37 +01:00
|
|
|
* prepqual.c and eval_const_expressions() for ease of matching to
|
2007-01-09 03:14:16 +01:00
|
|
|
* WHERE clauses. indpred is in implicit-AND form.
|
2011-10-11 20:20:06 +02:00
|
|
|
*
|
|
|
|
* indextlist is a TargetEntry list representing the index columns.
|
|
|
|
* It provides an equivalent base-relation Var for each simple column,
|
|
|
|
* and links to the matching indexprs element for each expression column.
|
Support using index-only scans with partial indexes in more cases.
Previously, the planner would reject an index-only scan if any restriction
clause for its table used a column not available from the index, even
if that restriction clause would later be dropped from the plan entirely
because it's implied by the index's predicate. This is a fairly common
situation for partial indexes because predicates using columns not included
in the index are often the most useful kind of predicate, and we have to
duplicate (or at least imply) the predicate in the WHERE clause in order
to get the index to be considered at all. So index-only scans were
essentially unavailable with such partial indexes.
To fix, we have to do detection of implied-by-predicate clauses much
earlier in the planner. This patch puts it in check_index_predicates
(nee check_partial_indexes), meaning it gets done for every partial index,
whereas we previously only considered this issue at createplan time,
so that the work was only done for an index actually selected for use.
That could result in a noticeable planning slowdown for queries against
tables with many partial indexes. However, testing suggested that there
isn't really a significant cost, especially not with reasonable numbers
of partial indexes. We do get a small additional benefit, which is that
cost_index is more accurate since it correctly discounts the evaluation
cost of clauses that will be removed. We can also avoid considering such
clauses as potential indexquals, which saves useless matching cycles in
the case where the predicate columns aren't in the index, and prevents
generating bogus plans that double-count the clause's selectivity when
the columns are in the index.
Tomas Vondra and Kyotaro Horiguchi, reviewed by Kevin Grittner and
Konstantin Knizhnik, and whacked around a little by me
2016-03-31 20:48:56 +02:00
|
|
|
*
|
|
|
|
* While most of these fields are filled when the IndexOptInfo is created
|
|
|
|
* (by plancat.c), indrestrictinfo and predOK are set later, in
|
|
|
|
* check_index_predicates().
|
2000-01-09 01:26:47 +01:00
|
|
|
*/
|
2019-02-12 03:26:08 +01:00
|
|
|
#ifndef HAVE_INDEXOPTINFO_TYPEDEF
|
|
|
|
typedef struct IndexOptInfo IndexOptInfo;
|
|
|
|
#define HAVE_INDEXOPTINFO_TYPEDEF 1
|
|
|
|
#endif
|
|
|
|
|
|
|
|
struct IndexOptInfo
|
2000-01-09 01:26:47 +01:00
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
NodeTag type;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* OID of the index relation */
|
|
|
|
Oid indexoid;
|
|
|
|
/* tablespace of index (not table) */
|
|
|
|
Oid reltablespace;
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
/* back-link to index's table; don't print, else infinite recursion */
|
|
|
|
RelOptInfo *rel pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* index-size statistics (from pg_class and elsewhere)
|
|
|
|
*/
|
|
|
|
/* number of disk pages in index */
|
|
|
|
BlockNumber pages;
|
|
|
|
/* number of index tuples in index */
|
|
|
|
Cardinality tuples;
|
|
|
|
/* index tree height, or -1 if unknown */
|
|
|
|
int tree_height;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* index descriptor information
|
|
|
|
*/
|
|
|
|
/* number of columns in index */
|
|
|
|
int ncolumns;
|
|
|
|
/* number of key columns in index */
|
|
|
|
int nkeycolumns;
|
|
|
|
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
/*
|
2022-07-20 19:54:25 +02:00
|
|
|
* table column numbers of index's columns (both key and included
|
|
|
|
* columns), or 0 for expression columns
|
2022-07-02 12:33:07 +02:00
|
|
|
*/
|
2022-07-20 19:54:25 +02:00
|
|
|
int *indexkeys pg_node_attr(array_size(ncolumns));
|
2022-07-02 12:33:07 +02:00
|
|
|
/* OIDs of collations of index columns */
|
2022-07-20 19:54:25 +02:00
|
|
|
Oid *indexcollations pg_node_attr(array_size(nkeycolumns));
|
2022-07-02 12:33:07 +02:00
|
|
|
/* OIDs of operator families for columns */
|
2022-07-20 19:54:25 +02:00
|
|
|
Oid *opfamily pg_node_attr(array_size(nkeycolumns));
|
2022-07-02 12:33:07 +02:00
|
|
|
/* OIDs of opclass declared input data types */
|
2022-07-20 19:54:25 +02:00
|
|
|
Oid *opcintype pg_node_attr(array_size(nkeycolumns));
|
2023-01-09 05:15:08 +01:00
|
|
|
/* OIDs of btree opfamilies, if orderable. NULL if partitioned index */
|
2022-07-20 19:54:25 +02:00
|
|
|
Oid *sortopfamily pg_node_attr(array_size(nkeycolumns));
|
2023-01-09 05:15:08 +01:00
|
|
|
/* is sort order descending? or NULL if partitioned index */
|
2022-07-20 19:54:25 +02:00
|
|
|
bool *reverse_sort pg_node_attr(array_size(nkeycolumns));
|
2023-01-09 05:15:08 +01:00
|
|
|
/* do NULLs come first in the sort order? or NULL if partitioned index */
|
2022-07-20 19:54:25 +02:00
|
|
|
bool *nulls_first pg_node_attr(array_size(nkeycolumns));
|
2022-07-02 12:33:07 +02:00
|
|
|
/* opclass-specific options for columns */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
bytea **opclassoptions pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* which index cols can be returned in an index-only scan? */
|
2022-07-20 19:54:25 +02:00
|
|
|
bool *canreturn pg_node_attr(array_size(ncolumns));
|
2022-07-02 12:33:07 +02:00
|
|
|
/* OID of the access method (in pg_am) */
|
|
|
|
Oid relam;
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* expressions for non-simple index columns; redundant to print since we
|
|
|
|
* print indextlist
|
|
|
|
*/
|
|
|
|
List *indexprs pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* predicate if a partial index, else NIL */
|
|
|
|
List *indpred;
|
|
|
|
|
|
|
|
/* targetlist representing index columns */
|
|
|
|
List *indextlist;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* parent relation's baserestrictinfo list, less any conditions implied by
|
|
|
|
* the index's predicate (unless it's a target rel, see comments in
|
|
|
|
* check_index_predicates())
|
|
|
|
*/
|
|
|
|
List *indrestrictinfo;
|
|
|
|
|
|
|
|
/* true if index predicate matches query */
|
|
|
|
bool predOK;
|
|
|
|
/* true if a unique index */
|
|
|
|
bool unique;
|
|
|
|
/* is uniqueness enforced immediately? */
|
|
|
|
bool immediate;
|
|
|
|
/* true if index doesn't really exist */
|
|
|
|
bool hypothetical;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remaining fields are copied from the index AM's API struct
|
2023-01-09 05:15:08 +01:00
|
|
|
* (IndexAmRoutine). These fields are not set for partitioned indexes.
|
2022-07-02 12:33:07 +02:00
|
|
|
*/
|
2022-07-20 19:54:25 +02:00
|
|
|
bool amcanorderbyop;
|
|
|
|
bool amoptionalkey;
|
|
|
|
bool amsearcharray;
|
|
|
|
bool amsearchnulls;
|
2022-07-02 12:33:07 +02:00
|
|
|
/* does AM have amgettuple interface? */
|
2022-07-20 19:54:25 +02:00
|
|
|
bool amhasgettuple;
|
2022-07-02 12:33:07 +02:00
|
|
|
/* does AM have amgetbitmap interface? */
|
2022-07-20 19:54:25 +02:00
|
|
|
bool amhasgetbitmap;
|
|
|
|
bool amcanparallel;
|
2022-07-02 12:33:07 +02:00
|
|
|
/* does AM have ammarkpos interface? */
|
2022-07-20 19:54:25 +02:00
|
|
|
bool amcanmarkpos;
|
2022-07-14 15:04:23 +02:00
|
|
|
/* AM's cost estimator */
|
2019-12-27 00:09:00 +01:00
|
|
|
/* Rather than include amapi.h here, we declare amcostestimate like this */
|
2022-07-14 15:04:23 +02:00
|
|
|
void (*amcostestimate) () pg_node_attr(read_write_ignore);
|
2019-02-12 03:26:08 +01:00
|
|
|
};
|
2000-01-09 01:26:47 +01:00
|
|
|
|
2016-06-18 21:22:34 +02:00
|
|
|
/*
|
|
|
|
* ForeignKeyOptInfo
|
|
|
|
* Per-foreign-key information for planning/optimization
|
|
|
|
*
|
|
|
|
* The per-FK-column arrays can be fixed-size because we allow at most
|
|
|
|
* INDEX_MAX_KEYS columns in a foreign key constraint. Each array has
|
|
|
|
* nkeys valid entries.
|
|
|
|
*/
|
|
|
|
typedef struct ForeignKeyOptInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(custom_read_write, no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2016-06-18 21:22:34 +02:00
|
|
|
NodeTag type;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* Basic data about the foreign key (fetched from catalogs):
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* RT index of the referencing table */
|
|
|
|
Index con_relid;
|
|
|
|
/* RT index of the referenced table */
|
|
|
|
Index ref_relid;
|
|
|
|
/* number of columns in the foreign key */
|
|
|
|
int nkeys;
|
|
|
|
/* cols in referencing table */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
AttrNumber conkey[INDEX_MAX_KEYS] pg_node_attr(array_size(nkeys));
|
2022-07-02 12:33:07 +02:00
|
|
|
/* cols in referenced table */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
AttrNumber confkey[INDEX_MAX_KEYS] pg_node_attr(array_size(nkeys));
|
2022-07-02 12:33:07 +02:00
|
|
|
/* PK = FK operator OIDs */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Oid conpfeqop[INDEX_MAX_KEYS] pg_node_attr(array_size(nkeys));
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Derived info about whether FK's equality conditions match the query:
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* # of FK cols matched by ECs */
|
|
|
|
int nmatched_ec;
|
|
|
|
/* # of these ECs that are ec_has_const */
|
|
|
|
int nconst_ec;
|
|
|
|
/* # of FK cols matched by non-EC rinfos */
|
|
|
|
int nmatched_rcols;
|
|
|
|
/* total # of non-EC rinfos matched to FK */
|
|
|
|
int nmatched_ri;
|
2016-06-18 21:22:34 +02:00
|
|
|
/* Pointer to eclass matching each column's condition, if there is one */
|
|
|
|
struct EquivalenceClass *eclass[INDEX_MAX_KEYS];
|
Fix foreign-key selectivity estimation in the presence of constants.
get_foreign_key_join_selectivity() looks for join clauses that equate
the two sides of the FK constraint. However, if we have a query like
"WHERE fktab.a = pktab.a and fktab.a = 1", it won't find any such join
clause, because equivclass.c replaces the given clauses with "fktab.a
= 1 and pktab.a = 1", which can be enforced at the scan level, leaving
nothing to be done for column "a" at the join level.
We can fix that expectation without much trouble, but then a new problem
arises: applying the foreign-key-based selectivity rule produces a
rowcount underestimate, because we're effectively double-counting the
selectivity of the "fktab.a = 1" clause. So we have to cancel that
selectivity out of the estimate.
To fix, refactor process_implied_equality() so that it can pass back the
new RestrictInfo to its callers in equivclass.c, allowing the generated
"fktab.a = 1" clause to be saved in the EquivalenceClass's ec_derives
list. Then it's not much trouble to dig out the relevant RestrictInfo
when we need to adjust an FK selectivity estimate. (While at it, we
can also remove the expensive use of initialize_mergeclause_eclasses()
to set up the new RestrictInfo's left_ec and right_ec pointers.
The equivclass.c code can set those basically for free.)
This seems like clearly a bug fix, but I'm hesitant to back-patch it,
first because there's some API/ABI risk for extensions and second because
we're usually loath to destabilize plan choices in stable branches.
Per report from Sigrid Ehrenreich.
Discussion: https://postgr.es/m/1019549.1603770457@sss.pgh.pa.us
Discussion: https://postgr.es/m/AM6PR02MB5287A0ADD936C1FA80973E72AB190@AM6PR02MB5287.eurprd02.prod.outlook.com
2020-10-28 16:15:47 +01:00
|
|
|
/* Pointer to eclass member for the referencing Var, if there is one */
|
|
|
|
struct EquivalenceMember *fk_eclass_member[INDEX_MAX_KEYS];
|
2016-06-18 21:22:34 +02:00
|
|
|
/* List of non-EC RestrictInfos matching each column's condition */
|
|
|
|
List *rinfos[INDEX_MAX_KEYS];
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
} ForeignKeyOptInfo;
|
2016-06-18 21:22:34 +02:00
|
|
|
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
/*
|
|
|
|
* StatisticExtInfo
|
|
|
|
* Information about extended statistics for planning/optimization
|
|
|
|
*
|
2017-04-06 17:27:15 +02:00
|
|
|
* Each pg_statistic_ext row is represented by one or more nodes of this
|
|
|
|
* type, or even zero if ANALYZE has not computed them.
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
*/
|
|
|
|
typedef struct StatisticExtInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
NodeTag type;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* OID of the statistics row */
|
|
|
|
Oid statOid;
|
|
|
|
|
|
|
|
/* includes child relations */
|
2022-07-20 19:54:25 +02:00
|
|
|
bool inherit;
|
2022-07-02 12:33:07 +02:00
|
|
|
|
2022-07-20 19:54:25 +02:00
|
|
|
/* back-link to statistic's table; don't print, else infinite recursion */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
RelOptInfo *rel pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/* statistics kind of this entry */
|
|
|
|
char kind;
|
|
|
|
|
|
|
|
/* attnums of the columns covered */
|
|
|
|
Bitmapset *keys;
|
|
|
|
|
|
|
|
/* expressions */
|
|
|
|
List *exprs;
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
} StatisticExtInfo;
|
2002-03-12 01:52:10 +01:00
|
|
|
|
2023-01-30 19:50:25 +01:00
|
|
|
/*
|
|
|
|
* JoinDomains
|
|
|
|
*
|
|
|
|
* A "join domain" defines the scope of applicability of deductions made via
|
|
|
|
* the EquivalenceClass mechanism. Roughly speaking, a join domain is a set
|
|
|
|
* of base+OJ relations that are inner-joined together. More precisely, it is
|
|
|
|
* the set of relations at which equalities deduced from an EquivalenceClass
|
|
|
|
* can be enforced or should be expected to hold. The topmost JoinDomain
|
|
|
|
* covers the whole query (so its jd_relids should equal all_query_rels).
|
|
|
|
* An outer join creates a new JoinDomain that includes all base+OJ relids
|
|
|
|
* within its nullable side, but (by convention) not the OJ's own relid.
|
|
|
|
* A FULL join creates two new JoinDomains, one for each side.
|
|
|
|
*
|
|
|
|
* Notice that a rel that is below outer join(s) will thus appear to belong
|
|
|
|
* to multiple join domains. However, any of its Vars that appear in
|
|
|
|
* EquivalenceClasses belonging to higher join domains will have nullingrel
|
|
|
|
* bits preventing them from being evaluated at the rel's scan level, so that
|
|
|
|
* we will not be able to derive enforceable-at-the-rel-scan-level clauses
|
|
|
|
* from such ECs. We define the join domain relid sets this way so that
|
|
|
|
* domains can be said to be "higher" or "lower" when one domain relid set
|
|
|
|
* includes another.
|
|
|
|
*
|
|
|
|
* The JoinDomains for a query are computed in deconstruct_jointree.
|
|
|
|
* We do not copy JoinDomain structs once made, so they can be compared
|
|
|
|
* for equality by simple pointer equality.
|
|
|
|
*/
|
|
|
|
typedef struct JoinDomain
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
2023-01-30 19:50:25 +01:00
|
|
|
|
|
|
|
NodeTag type;
|
|
|
|
|
|
|
|
Relids jd_relids; /* all relids contained within the domain */
|
|
|
|
} JoinDomain;
|
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
/*
|
|
|
|
* EquivalenceClasses
|
|
|
|
*
|
2023-01-30 19:50:25 +01:00
|
|
|
* Whenever we identify a mergejoinable equality clause A = B that is
|
|
|
|
* not an outer-join clause, we create an EquivalenceClass containing
|
2007-01-20 21:45:41 +01:00
|
|
|
* the expressions A and B to record this knowledge. If we later find another
|
|
|
|
* equivalence B = C, we add C to the existing EquivalenceClass; this may
|
|
|
|
* require merging two existing EquivalenceClasses. At the end of the qual
|
|
|
|
* distribution process, we have sets of values that are known all transitively
|
|
|
|
* equal to each other, where "equal" is according to the rules of the btree
|
2011-03-20 01:29:08 +01:00
|
|
|
* operator family(s) shown in ec_opfamilies, as well as the collation shown
|
|
|
|
* by ec_collation. (We restrict an EC to contain only equalities whose
|
|
|
|
* operators belong to the same set of opfamilies. This could probably be
|
|
|
|
* relaxed, but for now it's not worth the trouble, since nearly all equality
|
|
|
|
* operators belong to only one btree opclass anyway. Similarly, we suppose
|
|
|
|
* that all or none of the input datatypes are collatable, so that a single
|
|
|
|
* collation value is sufficient.)
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
2023-01-30 19:50:25 +01:00
|
|
|
* Strictly speaking, deductions from an EquivalenceClass hold only within
|
|
|
|
* a "join domain", that is a set of relations that are innerjoined together
|
|
|
|
* (see JoinDomain above). For the most part we don't need to account for
|
|
|
|
* this explicitly, because equality clauses from different join domains
|
|
|
|
* will contain Vars that are not equal() because they have different
|
|
|
|
* nullingrel sets, and thus we will never falsely merge ECs from different
|
|
|
|
* join domains. But Var-free (pseudoconstant) expressions lack that safety
|
|
|
|
* feature. We handle that by marking "const" EC members with the JoinDomain
|
|
|
|
* of the clause they came from; two nominally-equal const members will be
|
|
|
|
* considered different if they came from different JoinDomains. This ensures
|
|
|
|
* no false EquivalenceClass merges will occur.
|
|
|
|
*
|
2007-01-20 21:45:41 +01:00
|
|
|
* We also use EquivalenceClasses as the base structure for PathKeys, letting
|
|
|
|
* us represent knowledge about different sort orderings being equivalent.
|
|
|
|
* Since every PathKey must reference an EquivalenceClass, we will end up
|
|
|
|
* with single-member EquivalenceClasses whenever a sort key expression has
|
|
|
|
* not been equivalenced to anything else. It is also possible that such an
|
|
|
|
* EquivalenceClass will contain a volatile expression ("ORDER BY random()"),
|
|
|
|
* which is a case that can't arise otherwise since clauses containing
|
|
|
|
* volatile functions are never considered mergejoinable. We mark such
|
|
|
|
* EquivalenceClasses specially to prevent them from being merged with
|
2007-11-08 22:49:48 +01:00
|
|
|
* ordinary EquivalenceClasses. Also, for volatile expressions we have
|
|
|
|
* to be careful to match the EquivalenceClass to the correct targetlist
|
|
|
|
* entry: consider SELECT random() AS a, random() AS b ... ORDER BY b,a.
|
|
|
|
* So we record the SortGroupRef of the originating sort clause.
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
|
|
|
* NB: if ec_merged isn't NULL, this class has been merged into another, and
|
|
|
|
* should be ignored in favor of using the pointed-to class.
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
*
|
|
|
|
* NB: EquivalenceClasses are never copied after creation. Therefore,
|
|
|
|
* copyObject() copies pointers to them as pointers, and equal() compares
|
2022-12-02 21:20:30 +01:00
|
|
|
* pointers to EquivalenceClasses via pointer equality. This is implemented
|
|
|
|
* by putting copy_as_scalar and equal_as_scalar attributes on fields that
|
|
|
|
* are pointers to EquivalenceClasses. The same goes for EquivalenceMembers.
|
2007-01-20 21:45:41 +01:00
|
|
|
*/
|
|
|
|
typedef struct EquivalenceClass
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(custom_read_write, no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
NodeTag type;
|
|
|
|
|
|
|
|
List *ec_opfamilies; /* btree operator family OIDs */
|
2011-03-20 01:29:08 +01:00
|
|
|
Oid ec_collation; /* collation, if datatypes are collatable */
|
2007-01-20 21:45:41 +01:00
|
|
|
List *ec_members; /* list of EquivalenceMembers */
|
|
|
|
List *ec_sources; /* list of generating RestrictInfos */
|
2007-01-22 21:00:40 +01:00
|
|
|
List *ec_derives; /* list of derived RestrictInfos */
|
2014-03-05 22:00:22 +01:00
|
|
|
Relids ec_relids; /* all relids appearing in ec_members, except
|
|
|
|
* for child members (see below) */
|
2007-01-20 21:45:41 +01:00
|
|
|
bool ec_has_const; /* any pseudoconstants in ec_members? */
|
|
|
|
bool ec_has_volatile; /* the (sole) member is a volatile expr */
|
|
|
|
bool ec_broken; /* failed to generate needed clauses? */
|
2007-11-08 22:49:48 +01:00
|
|
|
Index ec_sortref; /* originating sortclause label, or 0 */
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
Index ec_min_security; /* minimum security_level in ec_sources */
|
|
|
|
Index ec_max_security; /* maximum security_level in ec_sources */
|
2007-01-20 21:45:41 +01:00
|
|
|
struct EquivalenceClass *ec_merged; /* set if merged into another EC */
|
|
|
|
} EquivalenceClass;
|
|
|
|
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
/*
|
2023-01-30 19:50:25 +01:00
|
|
|
* If an EC contains a constant, any PathKey depending on it must be
|
|
|
|
* redundant, since there's only one possible value of the key.
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
*/
|
|
|
|
#define EC_MUST_BE_REDUNDANT(eclass) \
|
2023-01-30 19:50:25 +01:00
|
|
|
((eclass)->ec_has_const)
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
/*
|
|
|
|
* EquivalenceMember - one member expression of an EquivalenceClass
|
|
|
|
*
|
|
|
|
* em_is_child signifies that this element was built by transposing a member
|
Revisit handling of UNION ALL subqueries with non-Var output columns.
In commit 57664ed25e5dea117158a2e663c29e60b3546e1c I tried to fix a bug
reported by Teodor Sigaev by making non-simple-Var output columns distinct
(by wrapping their expressions with dummy PlaceHolderVar nodes). This did
not work too well. Commit b28ffd0fcc583c1811e5295279e7d4366c3cae6c fixed
some ensuing problems with matching to child indexes, but per a recent
report from Claus Stadler, constraint exclusion of UNION ALL subqueries was
still broken, because constant-simplification didn't handle the injected
PlaceHolderVars well either. On reflection, the original patch was quite
misguided: there is no reason to expect that EquivalenceClass child members
will be distinct. So instead of trying to make them so, we should ensure
that we can cope with the situation when they're not.
Accordingly, this patch reverts the code changes in the above-mentioned
commits (though the regression test cases they added stay). Instead, I've
added assorted defenses to make sure that duplicate EC child members don't
cause any problems. Teodor's original problem ("MergeAppend child's
targetlist doesn't match MergeAppend") is addressed more directly by
revising prepare_sort_from_pathkeys to let the parent MergeAppend's sort
list guide creation of each child's sort list.
In passing, get rid of add_sort_column; as far as I can tell, testing for
duplicate sort keys at this stage is dead code. Certainly it doesn't
trigger often enough to be worth expending cycles on in ordinary queries.
And keeping the test would've greatly complicated the new logic in
prepare_sort_from_pathkeys, because comparing pathkey list entries against
a previous output array requires that we not skip any entries in the list.
Back-patch to 9.1, like the previous patches. The only known issue in
this area that wasn't caused by the ill-advised previous patches was the
MergeAppend planning failure, which of course is not relevant before 9.1.
It's possible that we need some of the new defenses against duplicate child
EC entries in older branches, but until there's some clear evidence of that
I'm going to refrain from back-patching further.
2012-03-16 18:11:12 +01:00
|
|
|
* for an appendrel parent relation to represent the corresponding expression
|
|
|
|
* for an appendrel child. These members are used for determining the
|
|
|
|
* pathkeys of scans on the child relation and for explicitly sorting the
|
|
|
|
* child when necessary to build a MergeAppend path for the whole appendrel
|
|
|
|
* tree. An em_is_child member has no impact on the properties of the EC as a
|
|
|
|
* whole; in particular the EC's ec_relids field does NOT include the child
|
|
|
|
* relation. An em_is_child member should never be marked em_is_const nor
|
|
|
|
* cause ec_has_const or ec_has_volatile to be set, either. Thus, em_is_child
|
|
|
|
* members are not really full-fledged members of the EC, but just reflections
|
|
|
|
* or doppelgangers of real members. Most operations on EquivalenceClasses
|
|
|
|
* should ignore em_is_child members, and those that don't should test
|
|
|
|
* em_relids to make sure they only consider relevant members.
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
|
|
|
* em_datatype is usually the same as exprType(em_expr), but can be
|
|
|
|
* different when dealing with a binary-compatible opfamily; in particular
|
|
|
|
* anyarray_ops would never work without this. Use em_datatype when
|
|
|
|
* looking up a specific btree operator to work with this expression.
|
|
|
|
*/
|
|
|
|
typedef struct EquivalenceMember
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
NodeTag type;
|
|
|
|
|
|
|
|
Expr *em_expr; /* the expression represented */
|
|
|
|
Relids em_relids; /* all relids appearing in em_expr */
|
|
|
|
bool em_is_const; /* expression is pseudoconstant? */
|
|
|
|
bool em_is_child; /* derived version for a child relation? */
|
|
|
|
Oid em_datatype; /* the "nominal type" used by the opfamily */
|
2023-01-30 19:50:25 +01:00
|
|
|
JoinDomain *em_jdomain; /* join domain containing the source clause */
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* if em_is_child is true, this links to corresponding EM for top parent */
|
|
|
|
struct EquivalenceMember *em_parent pg_node_attr(read_write_ignore);
|
2007-01-20 21:45:41 +01:00
|
|
|
} EquivalenceMember;
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/*
|
|
|
|
* PathKeys
|
|
|
|
*
|
2007-01-20 21:45:41 +01:00
|
|
|
* The sort ordering of a path is represented by a list of PathKey nodes.
|
|
|
|
* An empty list implies no known ordering. Otherwise the first item
|
|
|
|
* represents the primary sort key, the second the first secondary sort key,
|
|
|
|
* etc. The value being sorted is represented by linking to an
|
|
|
|
* EquivalenceClass containing that value and including pk_opfamily among its
|
2011-03-20 01:29:08 +01:00
|
|
|
* ec_opfamilies. The EquivalenceClass tells which collation to use, too.
|
|
|
|
* This is a convenient method because it makes it trivial to detect
|
|
|
|
* equivalent and closely-related orderings. (See optimizer/README for more
|
|
|
|
* information.)
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
|
|
|
* Note: pk_strategy is either BTLessStrategyNumber (for ASC) or
|
|
|
|
* BTGreaterStrategyNumber (for DESC). We assume that all ordering-capable
|
|
|
|
* index types will use btree-compatible strategy numbers.
|
1999-08-16 04:17:58 +02:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
typedef struct PathKey
|
1996-08-28 03:59:28 +02:00
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
NodeTag type;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2022-12-02 21:20:30 +01:00
|
|
|
/* the value that is ordered */
|
|
|
|
EquivalenceClass *pk_eclass pg_node_attr(copy_as_scalar, equal_as_scalar);
|
2007-01-20 21:45:41 +01:00
|
|
|
Oid pk_opfamily; /* btree opfamily defining the ordering */
|
|
|
|
int pk_strategy; /* sort direction (ASC or DESC) */
|
|
|
|
bool pk_nulls_first; /* do NULLs come before normal values? */
|
|
|
|
} PathKey;
|
1999-02-09 04:51:42 +01:00
|
|
|
|
2021-03-29 03:47:05 +02:00
|
|
|
/*
|
|
|
|
* VolatileFunctionStatus -- allows nodes to cache their
|
|
|
|
* contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
|
|
|
|
* determined.
|
|
|
|
*/
|
|
|
|
typedef enum VolatileFunctionStatus
|
|
|
|
{
|
|
|
|
VOLATILITY_UNKNOWN = 0,
|
|
|
|
VOLATILITY_VOLATILE,
|
|
|
|
VOLATILITY_NOVOLATILE
|
|
|
|
} VolatileFunctionStatus;
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
|
2016-03-14 21:59:59 +01:00
|
|
|
/*
|
|
|
|
* PathTarget
|
|
|
|
*
|
|
|
|
* This struct contains what we need to know during planning about the
|
|
|
|
* targetlist (output columns) that a Path will compute. Each RelOptInfo
|
|
|
|
* includes a default PathTarget, which its individual Paths may simply
|
|
|
|
* reference. However, in some cases a Path may compute outputs different
|
|
|
|
* from other Paths, and in that case we make a custom PathTarget for it.
|
|
|
|
* For example, an indexscan might return index expressions that would
|
|
|
|
* otherwise need to be explicitly calculated. (Note also that "upper"
|
|
|
|
* relations generally don't have useful default PathTargets.)
|
|
|
|
*
|
|
|
|
* exprs contains bare expressions; they do not have TargetEntry nodes on top,
|
|
|
|
* though those will appear in finished Plans.
|
|
|
|
*
|
|
|
|
* sortgrouprefs[] is an array of the same length as exprs, containing the
|
|
|
|
* corresponding sort/group refnos, or zeroes for expressions not referenced
|
|
|
|
* by sort/group clauses. If sortgrouprefs is NULL (which it generally is in
|
|
|
|
* RelOptInfo.reltarget targets; only upper-level Paths contain this info),
|
|
|
|
* we have not identified sort/group columns in this tlist. This allows us to
|
|
|
|
* deal with sort/group refnos when needed with less expense than including
|
|
|
|
* TargetEntry nodes in the exprs list.
|
|
|
|
*/
|
|
|
|
typedef struct PathTarget
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2016-03-14 21:59:59 +01:00
|
|
|
NodeTag type;
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/* list of expressions to be computed */
|
|
|
|
List *exprs;
|
|
|
|
|
|
|
|
/* corresponding sort/group refnos, or 0 */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Index *sortgrouprefs pg_node_attr(array_size(exprs));
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/* cost of evaluating the expressions */
|
|
|
|
QualCost cost;
|
|
|
|
|
|
|
|
/* estimated avg width of result tuples */
|
|
|
|
int width;
|
|
|
|
|
|
|
|
/* indicates if exprs contain any volatile functions */
|
|
|
|
VolatileFunctionStatus has_volatile_expr;
|
2016-03-14 21:59:59 +01:00
|
|
|
} PathTarget;
|
|
|
|
|
2016-06-13 18:59:25 +02:00
|
|
|
/* Convenience macro to get a sort/group refno from a PathTarget */
|
|
|
|
#define get_pathtarget_sortgroupref(target, colno) \
|
|
|
|
((target)->sortgrouprefs ? (target)->sortgrouprefs[colno] : (Index) 0)
|
|
|
|
|
2016-03-14 21:59:59 +01:00
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
/*
|
|
|
|
* ParamPathInfo
|
|
|
|
*
|
|
|
|
* All parameterized paths for a given relation with given required outer rels
|
|
|
|
* link to a single ParamPathInfo, which stores common information such as
|
|
|
|
* the estimated rowcount for this parameterization. We do this partly to
|
|
|
|
* avoid recalculations, but mostly to ensure that the estimated rowcount
|
|
|
|
* is in fact the same for every such path.
|
|
|
|
*
|
|
|
|
* Note: ppi_clauses is only used in ParamPathInfos for base relation paths;
|
|
|
|
* in join cases it's NIL because the set of relevant clauses varies depending
|
|
|
|
* on how the join is formed. The relevant clauses will appear in each
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* parameterized join path's joinrestrictinfo list, instead. ParamPathInfos
|
|
|
|
* for append relations don't bother with this, either.
|
|
|
|
*
|
|
|
|
* ppi_serials is the set of rinfo_serial numbers for quals that are enforced
|
|
|
|
* by this path. As with ppi_clauses, it's only maintained for baserels.
|
|
|
|
* (We could construct it on-the-fly from ppi_clauses, but it seems better
|
|
|
|
* to materialize a copy.)
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
*/
|
|
|
|
typedef struct ParamPathInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
NodeTag type;
|
|
|
|
|
|
|
|
Relids ppi_req_outer; /* rels supplying parameters used by path */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality ppi_rows; /* estimated number of result tuples */
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
List *ppi_clauses; /* join clauses available from outer rels */
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
Bitmapset *ppi_serials; /* set of rinfo_serial for enforced quals */
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
} ParamPathInfo;
|
|
|
|
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/*
|
2008-10-04 23:56:55 +02:00
|
|
|
* Type "Path" is used as-is for sequential-scan paths, as well as some other
|
|
|
|
* simple plan types that we don't need any extra information in the path for.
|
|
|
|
* For other path types it is the first component of a larger struct.
|
2002-11-27 21:52:04 +01:00
|
|
|
*
|
2012-01-28 01:26:38 +01:00
|
|
|
* "pathtype" is the NodeTag of the Plan node we could build from this Path.
|
|
|
|
* It is partially redundant with the Path's NodeTag, but allows us to use
|
|
|
|
* the same Path type for multiple Plan types when there is no need to
|
|
|
|
* distinguish the Plan type during path processing.
|
|
|
|
*
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
* "parent" identifies the relation this Path scans, and "pathtarget"
|
|
|
|
* describes the precise set of output columns the Path would compute.
|
|
|
|
* In simple cases all Paths for a given rel share the same targetlist,
|
2016-03-14 21:59:59 +01:00
|
|
|
* which we represent by having path->pathtarget equal to parent->reltarget.
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
*
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
* "param_info", if not NULL, links to a ParamPathInfo that identifies outer
|
|
|
|
* relation(s) that provide parameter values to each scan of this path.
|
|
|
|
* That means this path can only be joined to those rels by means of nestloop
|
|
|
|
* joins with this path on the inside. Also note that a parameterized path
|
|
|
|
* is responsible for testing all "movable" joinclauses involving this rel
|
|
|
|
* and the specified outer rel(s).
|
|
|
|
*
|
2012-01-28 01:26:38 +01:00
|
|
|
* "rows" is the same as parent->rows in simple paths, but in parameterized
|
|
|
|
* paths and UniquePaths it can be less than parent->rows, reflecting the
|
|
|
|
* fact that we've filtered by extra join conditions or removed duplicates.
|
|
|
|
*
|
|
|
|
* "pathkeys" is a List of PathKey nodes (see above), describing the sort
|
|
|
|
* ordering of the path's output rows.
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
*
|
|
|
|
* We do not support copying Path trees, mainly because the circular linkages
|
|
|
|
* between RelOptInfo and Path nodes can't be handled easily in a simple
|
|
|
|
* depth-first traversal. We also don't have read support at the moment.
|
1999-08-16 04:17:58 +02:00
|
|
|
*/
|
1996-08-28 03:59:28 +02:00
|
|
|
typedef struct Path
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
NodeTag type;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* tag identifying scan/join method */
|
|
|
|
NodeTag pathtype;
|
2004-01-05 06:07:36 +01:00
|
|
|
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
/*
|
|
|
|
* the relation this path can build
|
|
|
|
*
|
|
|
|
* We do NOT print the parent, else we'd be in infinite recursion. We can
|
|
|
|
* print the parent's relids for identification purposes, though.
|
|
|
|
*/
|
|
|
|
RelOptInfo *parent pg_node_attr(write_only_relids);
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
/*
|
|
|
|
* list of Vars/Exprs, cost, width
|
|
|
|
*
|
|
|
|
* We print the pathtarget only if it's not the default one for the rel.
|
|
|
|
*/
|
|
|
|
PathTarget *pathtarget pg_node_attr(write_only_nondefault_pathtarget);
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
/*
|
|
|
|
* parameterization info, or NULL if none
|
|
|
|
*
|
|
|
|
* We do not print the whole of param_info, since it's printed via
|
|
|
|
* RelOptInfo; it's sufficient and less cluttering to print just the
|
|
|
|
* required outer relids.
|
|
|
|
*/
|
|
|
|
ParamPathInfo *param_info pg_node_attr(write_only_req_outer);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/* engage parallel-aware logic? */
|
|
|
|
bool parallel_aware;
|
|
|
|
/* OK to use as part of parallel plan? */
|
|
|
|
bool parallel_safe;
|
|
|
|
/* desired # of workers; 0 = not parallel */
|
|
|
|
int parallel_workers;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2012-01-28 01:26:38 +01:00
|
|
|
/* estimated size/costs for path (see costsize.c for more info) */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality rows; /* estimated number of result tuples */
|
2000-02-15 21:49:31 +01:00
|
|
|
Cost startup_cost; /* cost expended before fetching any tuples */
|
|
|
|
Cost total_cost; /* total cost (assuming all tuples fetched) */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* sort ordering of path's output; a List of PathKey nodes; see above */
|
|
|
|
List *pathkeys;
|
1996-08-28 03:59:28 +02:00
|
|
|
} Path;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
/* Macro for extracting a path's parameterization relids; beware double eval */
|
|
|
|
#define PATH_REQ_OUTER(path) \
|
|
|
|
((path)->param_info ? (path)->param_info->ppi_req_outer : (Relids) NULL)
|
|
|
|
|
1999-07-25 01:21:14 +02:00
|
|
|
/*----------
|
2005-04-25 03:30:14 +02:00
|
|
|
* IndexPath represents an index scan over a single index.
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
2011-10-11 20:20:06 +02:00
|
|
|
* This struct is used for both regular indexscans and index-only scans;
|
|
|
|
* path.pathtype is T_IndexScan or T_IndexOnlyScan to show which is meant.
|
|
|
|
*
|
2005-04-25 03:30:14 +02:00
|
|
|
* 'indexinfo' is the index to be scanned.
|
2005-04-20 00:35:18 +02:00
|
|
|
*
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
* 'indexclauses' is a list of IndexClause nodes, each representing one
|
|
|
|
* index-checkable restriction, with implicit AND semantics across the list.
|
|
|
|
* An empty list implies a full index scan.
|
2011-12-25 01:03:21 +01:00
|
|
|
*
|
|
|
|
* 'indexorderbys', if not NIL, is a list of ORDER BY expressions that have
|
|
|
|
* been found to be usable as ordering operators for an amcanorderbyop index.
|
|
|
|
* The list must match the path's pathkeys, ie, one expression per pathkey
|
|
|
|
* in the same order. These are not RestrictInfos, just bare expressions,
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
* since they generally won't yield booleans. It's guaranteed that each
|
|
|
|
* expression has the index key on the left side of the operator.
|
2011-12-25 01:03:21 +01:00
|
|
|
*
|
|
|
|
* 'indexorderbycols' is an integer list of index column numbers (zero-based)
|
|
|
|
* of the same length as 'indexorderbys', showing which index column each
|
|
|
|
* ORDER BY expression is meant to be used with. (There is no restriction
|
|
|
|
* on which index column each ORDER BY can be used with.)
|
2010-12-03 02:50:48 +01:00
|
|
|
*
|
2000-02-15 21:49:31 +01:00
|
|
|
* 'indexscandir' is one of:
|
2023-01-31 22:52:41 +01:00
|
|
|
* ForwardScanDirection: forward scan of an index
|
2000-02-15 21:49:31 +01:00
|
|
|
* BackwardScanDirection: backward scan of an ordered index
|
2023-01-31 22:52:41 +01:00
|
|
|
* Unordered indexes will always have an indexscandir of ForwardScanDirection.
|
2000-02-15 21:49:31 +01:00
|
|
|
*
|
2005-04-21 04:28:02 +02:00
|
|
|
* 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that
|
|
|
|
* we need not recompute them when considering using the same index in a
|
2005-04-25 03:30:14 +02:00
|
|
|
* bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath
|
2011-10-11 20:20:06 +02:00
|
|
|
* itself represent the costs of an IndexScan or IndexOnlyScan plan type.
|
1999-07-25 01:21:14 +02:00
|
|
|
*----------
|
|
|
|
*/
|
1996-08-28 03:59:28 +02:00
|
|
|
typedef struct IndexPath
|
|
|
|
{
|
|
|
|
Path path;
|
2005-04-25 03:30:14 +02:00
|
|
|
IndexOptInfo *indexinfo;
|
2004-01-06 00:39:54 +01:00
|
|
|
List *indexclauses;
|
2010-12-03 02:50:48 +01:00
|
|
|
List *indexorderbys;
|
2011-12-25 01:03:21 +01:00
|
|
|
List *indexorderbycols;
|
2000-02-15 21:49:31 +01:00
|
|
|
ScanDirection indexscandir;
|
2005-04-21 04:28:02 +02:00
|
|
|
Cost indextotalcost;
|
|
|
|
Selectivity indexselectivity;
|
1996-08-28 03:59:28 +02:00
|
|
|
} IndexPath;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
/*
|
|
|
|
* Each IndexClause references a RestrictInfo node from the query's WHERE
|
|
|
|
* or JOIN conditions, and shows how that restriction can be applied to
|
|
|
|
* the particular index. We support both indexclauses that are directly
|
|
|
|
* usable by the index machinery, which are typically of the form
|
|
|
|
* "indexcol OP pseudoconstant", and those from which an indexable qual
|
|
|
|
* can be derived. The simplest such transformation is that a clause
|
|
|
|
* of the form "pseudoconstant OP indexcol" can be commuted to produce an
|
|
|
|
* indexable qual (the index machinery expects the indexcol to be on the
|
|
|
|
* left always). Another example is that we might be able to extract an
|
|
|
|
* indexable range condition from a LIKE condition, as in "x LIKE 'foo%bar'"
|
|
|
|
* giving rise to "x >= 'foo' AND x < 'fop'". Derivation of such lossy
|
|
|
|
* conditions is done by a planner support function attached to the
|
|
|
|
* indexclause's top-level function or operator.
|
|
|
|
*
|
2019-02-15 01:37:30 +01:00
|
|
|
* indexquals is a list of RestrictInfos for the directly-usable index
|
|
|
|
* conditions associated with this IndexClause. In the simplest case
|
|
|
|
* it's a one-element list whose member is iclause->rinfo. Otherwise,
|
|
|
|
* it contains one or more directly-usable indexqual conditions extracted
|
|
|
|
* from the given clause. The 'lossy' flag indicates whether the
|
|
|
|
* indexquals are semantically equivalent to the original clause, or
|
|
|
|
* represent a weaker condition.
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
*
|
|
|
|
* Normally, indexcol is the index of the single index column the clause
|
|
|
|
* works on, and indexcols is NIL. But if the clause is a RowCompareExpr,
|
|
|
|
* indexcol is the index of the leading column, and indexcols is a list of
|
|
|
|
* all the affected columns. (Note that indexcols matches up with the
|
2019-02-15 01:37:30 +01:00
|
|
|
* columns of the actual indexable RowCompareExpr in indexquals, which
|
|
|
|
* might be different from the original in rinfo.)
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
*
|
|
|
|
* An IndexPath's IndexClause list is required to be ordered by index
|
|
|
|
* column, i.e. the indexcol values must form a nondecreasing sequence.
|
|
|
|
* (The order of multiple clauses for the same index column is unspecified.)
|
|
|
|
*/
|
|
|
|
typedef struct IndexClause
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
NodeTag type;
|
|
|
|
struct RestrictInfo *rinfo; /* original restriction or join clause */
|
2019-02-15 01:37:30 +01:00
|
|
|
List *indexquals; /* indexqual(s) derived from it */
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
bool lossy; /* are indexquals a lossy version of clause? */
|
|
|
|
AttrNumber indexcol; /* index column the clause uses (zero-based) */
|
|
|
|
List *indexcols; /* multiple index columns, if RowCompare */
|
|
|
|
} IndexClause;
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
|
|
|
* BitmapHeapPath represents one or more indexscans that generate TID bitmaps
|
|
|
|
* instead of directly accessing the heap, followed by AND/OR combinations
|
|
|
|
* to produce a single bitmap, followed by a heap scan that uses the bitmap.
|
|
|
|
* Note that the output is always considered unordered, since it will come
|
|
|
|
* out in physical heap order no matter what the underlying indexes did.
|
|
|
|
*
|
|
|
|
* The individual indexscans are represented by IndexPath nodes, and any
|
2005-04-25 03:30:14 +02:00
|
|
|
* logic on top of them is represented by a tree of BitmapAndPath and
|
|
|
|
* BitmapOrPath nodes. Notice that we can use the same IndexPath node both
|
2011-10-11 20:20:06 +02:00
|
|
|
* to represent a regular (or index-only) index scan plan, and as the child
|
|
|
|
* of a BitmapHeapPath that represents scanning the same index using a
|
|
|
|
* BitmapIndexScan. The startup_cost and total_cost figures of an IndexPath
|
|
|
|
* always represent the costs to use it as a regular (or index-only)
|
|
|
|
* IndexScan. The costs of a BitmapIndexScan can be computed using the
|
|
|
|
* IndexPath's indextotalcost and indexselectivity.
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
|
|
|
typedef struct BitmapHeapPath
|
|
|
|
{
|
|
|
|
Path path;
|
2005-04-21 21:18:13 +02:00
|
|
|
Path *bitmapqual; /* IndexPath, BitmapAndPath, BitmapOrPath */
|
2005-04-20 00:35:18 +02:00
|
|
|
} BitmapHeapPath;
|
|
|
|
|
2005-04-21 21:18:13 +02:00
|
|
|
/*
|
|
|
|
* BitmapAndPath represents a BitmapAnd plan node; it can only appear as
|
|
|
|
* part of the substructure of a BitmapHeapPath. The Path structure is
|
|
|
|
* a bit more heavyweight than we really need for this, but for simplicity
|
|
|
|
* we make it a derivative of Path anyway.
|
|
|
|
*/
|
|
|
|
typedef struct BitmapAndPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
List *bitmapquals; /* IndexPaths and BitmapOrPaths */
|
|
|
|
Selectivity bitmapselectivity;
|
|
|
|
} BitmapAndPath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BitmapOrPath represents a BitmapOr plan node; it can only appear as
|
|
|
|
* part of the substructure of a BitmapHeapPath. The Path structure is
|
|
|
|
* a bit more heavyweight than we really need for this, but for simplicity
|
|
|
|
* we make it a derivative of Path anyway.
|
|
|
|
*/
|
|
|
|
typedef struct BitmapOrPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
List *bitmapquals; /* IndexPaths and BitmapAndPaths */
|
|
|
|
Selectivity bitmapselectivity;
|
|
|
|
} BitmapOrPath;
|
|
|
|
|
2000-11-12 01:37:02 +01:00
|
|
|
/*
|
|
|
|
* TidPath represents a scan by TID
|
2004-01-05 06:07:36 +01:00
|
|
|
*
|
2005-11-26 23:14:57 +01:00
|
|
|
* tidquals is an implicitly OR'ed list of qual expressions of the form
|
2018-12-30 21:24:28 +01:00
|
|
|
* "CTID = pseudoconstant", or "CTID = ANY(pseudoconstant_array)",
|
|
|
|
* or a CurrentOfExpr for the relation.
|
2000-11-12 01:37:02 +01:00
|
|
|
*/
|
1999-11-23 21:07:06 +01:00
|
|
|
typedef struct TidPath
|
|
|
|
{
|
|
|
|
Path path;
|
2005-11-26 23:14:57 +01:00
|
|
|
List *tidquals; /* qual(s) involving CTID = something */
|
1999-11-23 21:07:06 +01:00
|
|
|
} TidPath;
|
|
|
|
|
2021-02-27 10:59:36 +01:00
|
|
|
/*
|
2021-07-08 12:45:09 +02:00
|
|
|
* TidRangePath represents a scan by a contiguous range of TIDs
|
2021-02-27 10:59:36 +01:00
|
|
|
*
|
|
|
|
* tidrangequals is an implicitly AND'ed list of qual expressions of the form
|
|
|
|
* "CTID relop pseudoconstant", where relop is one of >,>=,<,<=.
|
|
|
|
*/
|
|
|
|
typedef struct TidRangePath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
List *tidrangequals;
|
|
|
|
} TidRangePath;
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
/*
|
|
|
|
* SubqueryScanPath represents a scan of an unflattened subquery-in-FROM
|
|
|
|
*
|
|
|
|
* Note that the subpath comes from a different planning domain; for example
|
|
|
|
* RTE indexes within it mean something different from those known to the
|
|
|
|
* SubqueryScanPath. path.parent->subroot is the planning context needed to
|
|
|
|
* interpret the subpath.
|
|
|
|
*/
|
|
|
|
typedef struct SubqueryScanPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing subquery execution */
|
|
|
|
} SubqueryScanPath;
|
|
|
|
|
2011-02-20 06:17:18 +01:00
|
|
|
/*
|
2016-04-21 19:30:48 +02:00
|
|
|
* ForeignPath represents a potential scan of a foreign table, foreign join
|
|
|
|
* or foreign upper-relation.
|
Revise FDW planning API, again.
Further reflection shows that a single callback isn't very workable if we
desire to let FDWs generate multiple Paths, because that forces the FDW to
do all work necessary to generate a valid Plan node for each Path. Instead
split the former PlanForeignScan API into three steps: GetForeignRelSize,
GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking
the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain,
and it's substantially more flexible for complex FDWs.
Add an fdw_private field to RelOptInfo so that the new functions can save
state there rather than possibly having to recalculate information two or
three times.
In addition, we'd not thought through what would be needed to allow an FDW
to set up subexpressions of its choice for runtime execution. We could
treat ForeignScan.fdw_private as an executable expression but that seems
likely to break existing FDWs unnecessarily (in particular, it would
restrict the set of node types allowable in fdw_private to those supported
by expression_tree_walker). Instead, invent a separate field fdw_exprs
which will receive the postprocessing appropriate for expression trees.
(One field is enough since it can be a list of expressions; also, we assume
the corresponding expression state tree(s) will be held within fdw_state,
so we don't need to add anything to ForeignScanState.)
Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this
further as we continue to work on that patch, but to me it feels a lot
closer to being right now.
2012-03-09 18:48:48 +01:00
|
|
|
*
|
|
|
|
* fdw_private stores FDW private data about the scan. While fdw_private is
|
|
|
|
* not actually touched by the core code during normal operations, it's
|
|
|
|
* generally a good idea to use a representation that can be dumped by
|
|
|
|
* nodeToString(), so that you can examine the structure during debugging
|
|
|
|
* with tools like pprint().
|
2011-02-20 06:17:18 +01:00
|
|
|
*/
|
|
|
|
typedef struct ForeignPath
|
|
|
|
{
|
|
|
|
Path path;
|
Allow foreign and custom joins to handle EvalPlanQual rechecks.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 provided basic
infrastructure for allowing a foreign data wrapper or custom scan
provider to replace a join of one or more tables with a scan.
However, this infrastructure failed to take into account the need
for possible EvalPlanQual rechecks, and ExecScanFetch would fail
an assertion (or just overwrite memory) if such a check was attempted
for a plan containing a pushed-down join. To fix, adjust the EPQ
machinery to skip some processing steps when scanrelid == 0, making
those the responsibility of scan's recheck method, which also has
the responsibility in this case of correctly populating the relevant
slot.
To allow foreign scans to gain control in the right place to make
use of this new facility, add a new, optional RecheckForeignScan
method. Also, allow a foreign scan to have a child plan, which can
be used to correctly populate the slot (or perhaps for something
else, but this is the only use currently envisioned).
KaiGai Kohei, reviewed by Robert Haas, Etsuro Fujita, and Kyotaro
Horiguchi.
2015-12-08 18:31:03 +01:00
|
|
|
Path *fdw_outerpath;
|
2012-03-05 22:15:59 +01:00
|
|
|
List *fdw_private;
|
2011-02-20 06:17:18 +01:00
|
|
|
} ForeignPath;
|
|
|
|
|
2014-11-07 23:26:02 +01:00
|
|
|
/*
|
2014-11-22 00:21:46 +01:00
|
|
|
* CustomPath represents a table scan done by some out-of-core extension.
|
|
|
|
*
|
|
|
|
* We provide a set of hooks here - which the provider must take care to set
|
|
|
|
* up correctly - to allow extensions to supply their own methods of scanning
|
|
|
|
* a relation. For example, a provider might provide GPU acceleration, a
|
|
|
|
* cache-based scan, or some other kind of logic we haven't dreamed up yet.
|
|
|
|
*
|
|
|
|
* CustomPaths can be injected into the planning process for a relation by
|
|
|
|
* set_rel_pathlist_hook functions.
|
|
|
|
*
|
|
|
|
* Core code must avoid assuming that the CustomPath is only as large as
|
|
|
|
* the structure declared here; providers are allowed to make it the first
|
|
|
|
* element in a larger structure. (Since the planner never copies Paths,
|
|
|
|
* this doesn't add any complication.) However, for consistency with the
|
|
|
|
* FDW case, we provide a "custom_private" field in CustomPath; providers
|
|
|
|
* may prefer to use that rather than define another struct type.
|
2014-11-07 23:26:02 +01:00
|
|
|
*/
|
|
|
|
|
2016-03-29 17:00:18 +02:00
|
|
|
struct CustomPathMethods;
|
2014-11-07 23:26:02 +01:00
|
|
|
|
2014-11-21 00:36:07 +01:00
|
|
|
typedef struct CustomPath
|
|
|
|
{
|
|
|
|
Path path;
|
2016-08-31 09:06:18 +02:00
|
|
|
uint32 flags; /* mask of CUSTOMPATH_* flags, see
|
|
|
|
* nodes/extensible.h */
|
2015-06-26 15:40:47 +02:00
|
|
|
List *custom_paths; /* list of child Path nodes, if any */
|
2014-11-22 00:21:46 +01:00
|
|
|
List *custom_private;
|
2016-03-29 17:00:18 +02:00
|
|
|
const struct CustomPathMethods *methods;
|
2014-11-21 00:36:07 +01:00
|
|
|
} CustomPath;
|
|
|
|
|
2000-11-12 01:37:02 +01:00
|
|
|
/*
|
|
|
|
* AppendPath represents an Append plan, ie, successive execution of
|
2006-01-31 22:39:25 +01:00
|
|
|
* several member plans.
|
2005-07-23 23:05:48 +02:00
|
|
|
*
|
Support Parallel Append plan nodes.
When we create an Append node, we can spread out the workers over the
subplans instead of piling on to each subplan one at a time, which
should typically be a bit more efficient, both because the startup
cost of any plan executed entirely by one worker is paid only once and
also because of reduced contention. We can also construct Append
plans using a mix of partial and non-partial subplans, which may allow
for parallelism in places that otherwise couldn't support it.
Unfortunately, this patch doesn't handle the important case of
parallelizing UNION ALL by running each branch in a separate worker;
the executor infrastructure is added here, but more planner work is
needed.
Amit Khandekar, Robert Haas, Amul Sul, reviewed and tested by
Ashutosh Bapat, Amit Langote, Rafia Sabih, Amit Kapila, and
Rajkumar Raghuwanshi.
Discussion: http://postgr.es/m/CAJ3gD9dy0K_E8r727heqXoBmWZ83HwLFwdcaSSmBQ1+S+vRuUQ@mail.gmail.com
2017-12-05 23:28:39 +01:00
|
|
|
* For partial Append, 'subpaths' contains non-partial subpaths followed by
|
|
|
|
* partial subpaths.
|
|
|
|
*
|
2005-07-23 23:05:48 +02:00
|
|
|
* Note: it is possible for "subpaths" to contain only one, or even no,
|
|
|
|
* elements. These cases are optimized during create_append_plan.
|
2008-03-24 22:53:04 +01:00
|
|
|
* In particular, an AppendPath with no subpaths is a "dummy" path that
|
|
|
|
* is created to represent the case that a relation is provably empty.
|
Fix handling of targetlist SRFs when scan/join relation is known empty.
When we introduced separate ProjectSetPath nodes for application of
set-returning functions in v10, we inadvertently broke some cases where
we're supposed to recognize that the result of a subquery is known to be
empty (contain zero rows). That's because IS_DUMMY_REL was just looking
for a childless AppendPath without allowing for a ProjectSetPath being
possibly stuck on top. In itself, this didn't do anything much worse
than produce slightly worse plans for some corner cases.
Then in v11, commit 11cf92f6e rearranged things to allow the scan/join
targetlist to be applied directly to partial paths before they get
gathered. But it inserted a short-circuit path for dummy relations
that was a little too short: it failed to insert a ProjectSetPath node
at all for a targetlist containing set-returning functions, resulting in
bogus "set-valued function called in context that cannot accept a set"
errors, as reported in bug #15669 from Madelaine Thibaut.
The best way to fix this mess seems to be to reimplement IS_DUMMY_REL
so that it drills down through any ProjectSetPath nodes that might be
there (and it seems like we'd better allow for ProjectionPath as well).
While we're at it, make it look at rel->pathlist not cheapest_total_path,
so that it gives the right answer independently of whether set_cheapest
has been done lately. That dependency looks pretty shaky in the context
of code like apply_scanjoin_target_to_paths, and even if it's not broken
today it'd certainly bite us at some point. (Nastily, unsafe use of the
old coding would almost always work; the hazard comes down to possibly
looking through a dangling pointer, and only once in a blue moon would
you find something there that resulted in the wrong answer.)
It now looks like it was a mistake for IS_DUMMY_REL to be a macro: if
there are any extensions using it, they'll continue to use the old
inadequate logic until they're recompiled, after which they'll fail
to load into server versions predating this fix. Hopefully there are
few such extensions.
Having fixed IS_DUMMY_REL, the special path for dummy rels in
apply_scanjoin_target_to_paths is unnecessary as well as being wrong,
so we can just drop it.
Also change a few places that were testing for partitioned-ness of a
planner relation but not using IS_PARTITIONED_REL for the purpose; that
seems unsafe as well as inconsistent, plus it required an ugly hack in
apply_scanjoin_target_to_paths.
In passing, save a few cycles in apply_scanjoin_target_to_paths by
skipping processing of pre-existing paths for partitioned rels,
and do some cosmetic cleanup and comment adjustment in that function.
I renamed IS_DUMMY_PATH to IS_DUMMY_APPEND with the intention of breaking
any code that might be using it, since in almost every case that would
be wrong; IS_DUMMY_REL is what to be using instead.
In HEAD, also make set_dummy_rel_pathlist static (since it's no longer
used from outside allpaths.c), and delete is_dummy_plan, since it's no
longer used anywhere.
Back-patch as appropriate into v11 and v10.
Tom Lane and Julien Rouhaud
Discussion: https://postgr.es/m/15669-02fb3296cca26203@postgresql.org
2019-03-07 20:21:52 +01:00
|
|
|
* (This is a convenient representation because it means that when we build
|
|
|
|
* an appendrel and find that all its children have been excluded, no extra
|
|
|
|
* action is needed to recognize the relation as dummy.)
|
2000-11-12 01:37:02 +01:00
|
|
|
*/
|
|
|
|
typedef struct AppendPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
List *subpaths; /* list of component Paths */
|
Use Append rather than MergeAppend for scanning ordered partitions.
If we need ordered output from a scan of a partitioned table, but
the ordering matches the partition ordering, then we don't need to
use a MergeAppend to combine the pre-ordered per-partition scan
results: a plain Append will produce the same results. This
both saves useless comparison work inside the MergeAppend proper,
and allows us to start returning tuples after istarting up just
the first child node not all of them.
However, all is not peaches and cream, because if some of the
child nodes have high startup costs then there will be big
discontinuities in the tuples-returned-versus-elapsed-time curve.
The planner's cost model cannot handle that (yet, anyway).
If we model the Append's startup cost as being just the first
child's startup cost, we may drastically underestimate the cost
of fetching slightly more tuples than are available from the first
child. Since we've had bad experiences with over-optimistic choices
of "fast start" plans for ORDER BY LIMIT queries, that seems scary.
As a klugy workaround, set the startup cost estimate for an ordered
Append to be the sum of its children's startup costs (as MergeAppend
would). This doesn't really describe reality, but it's less likely
to cause a bad plan choice than an underestimated startup cost would.
In practice, the cases where we really care about this optimization
will have child plans that are IndexScans with zero startup cost,
so that the overly conservative estimate is still just zero.
David Rowley, reviewed by Julien Rouhaud and Antonin Houska
Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
2019-04-06 01:20:30 +02:00
|
|
|
/* Index of first partial path in subpaths; list_length(subpaths) if none */
|
Support Parallel Append plan nodes.
When we create an Append node, we can spread out the workers over the
subplans instead of piling on to each subplan one at a time, which
should typically be a bit more efficient, both because the startup
cost of any plan executed entirely by one worker is paid only once and
also because of reduced contention. We can also construct Append
plans using a mix of partial and non-partial subplans, which may allow
for parallelism in places that otherwise couldn't support it.
Unfortunately, this patch doesn't handle the important case of
parallelizing UNION ALL by running each branch in a separate worker;
the executor infrastructure is added here, but more planner work is
needed.
Amit Khandekar, Robert Haas, Amul Sul, reviewed and tested by
Ashutosh Bapat, Amit Langote, Rafia Sabih, Amit Kapila, and
Rajkumar Raghuwanshi.
Discussion: http://postgr.es/m/CAJ3gD9dy0K_E8r727heqXoBmWZ83HwLFwdcaSSmBQ1+S+vRuUQ@mail.gmail.com
2017-12-05 23:28:39 +01:00
|
|
|
int first_partial_path;
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality limit_tuples; /* hard limit on output tuples, or -1 */
|
2000-11-12 01:37:02 +01:00
|
|
|
} AppendPath;
|
|
|
|
|
Fix handling of targetlist SRFs when scan/join relation is known empty.
When we introduced separate ProjectSetPath nodes for application of
set-returning functions in v10, we inadvertently broke some cases where
we're supposed to recognize that the result of a subquery is known to be
empty (contain zero rows). That's because IS_DUMMY_REL was just looking
for a childless AppendPath without allowing for a ProjectSetPath being
possibly stuck on top. In itself, this didn't do anything much worse
than produce slightly worse plans for some corner cases.
Then in v11, commit 11cf92f6e rearranged things to allow the scan/join
targetlist to be applied directly to partial paths before they get
gathered. But it inserted a short-circuit path for dummy relations
that was a little too short: it failed to insert a ProjectSetPath node
at all for a targetlist containing set-returning functions, resulting in
bogus "set-valued function called in context that cannot accept a set"
errors, as reported in bug #15669 from Madelaine Thibaut.
The best way to fix this mess seems to be to reimplement IS_DUMMY_REL
so that it drills down through any ProjectSetPath nodes that might be
there (and it seems like we'd better allow for ProjectionPath as well).
While we're at it, make it look at rel->pathlist not cheapest_total_path,
so that it gives the right answer independently of whether set_cheapest
has been done lately. That dependency looks pretty shaky in the context
of code like apply_scanjoin_target_to_paths, and even if it's not broken
today it'd certainly bite us at some point. (Nastily, unsafe use of the
old coding would almost always work; the hazard comes down to possibly
looking through a dangling pointer, and only once in a blue moon would
you find something there that resulted in the wrong answer.)
It now looks like it was a mistake for IS_DUMMY_REL to be a macro: if
there are any extensions using it, they'll continue to use the old
inadequate logic until they're recompiled, after which they'll fail
to load into server versions predating this fix. Hopefully there are
few such extensions.
Having fixed IS_DUMMY_REL, the special path for dummy rels in
apply_scanjoin_target_to_paths is unnecessary as well as being wrong,
so we can just drop it.
Also change a few places that were testing for partitioned-ness of a
planner relation but not using IS_PARTITIONED_REL for the purpose; that
seems unsafe as well as inconsistent, plus it required an ugly hack in
apply_scanjoin_target_to_paths.
In passing, save a few cycles in apply_scanjoin_target_to_paths by
skipping processing of pre-existing paths for partitioned rels,
and do some cosmetic cleanup and comment adjustment in that function.
I renamed IS_DUMMY_PATH to IS_DUMMY_APPEND with the intention of breaking
any code that might be using it, since in almost every case that would
be wrong; IS_DUMMY_REL is what to be using instead.
In HEAD, also make set_dummy_rel_pathlist static (since it's no longer
used from outside allpaths.c), and delete is_dummy_plan, since it's no
longer used anywhere.
Back-patch as appropriate into v11 and v10.
Tom Lane and Julien Rouhaud
Discussion: https://postgr.es/m/15669-02fb3296cca26203@postgresql.org
2019-03-07 20:21:52 +01:00
|
|
|
#define IS_DUMMY_APPEND(p) \
|
2008-03-24 22:53:04 +01:00
|
|
|
(IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL)
|
|
|
|
|
Fix handling of targetlist SRFs when scan/join relation is known empty.
When we introduced separate ProjectSetPath nodes for application of
set-returning functions in v10, we inadvertently broke some cases where
we're supposed to recognize that the result of a subquery is known to be
empty (contain zero rows). That's because IS_DUMMY_REL was just looking
for a childless AppendPath without allowing for a ProjectSetPath being
possibly stuck on top. In itself, this didn't do anything much worse
than produce slightly worse plans for some corner cases.
Then in v11, commit 11cf92f6e rearranged things to allow the scan/join
targetlist to be applied directly to partial paths before they get
gathered. But it inserted a short-circuit path for dummy relations
that was a little too short: it failed to insert a ProjectSetPath node
at all for a targetlist containing set-returning functions, resulting in
bogus "set-valued function called in context that cannot accept a set"
errors, as reported in bug #15669 from Madelaine Thibaut.
The best way to fix this mess seems to be to reimplement IS_DUMMY_REL
so that it drills down through any ProjectSetPath nodes that might be
there (and it seems like we'd better allow for ProjectionPath as well).
While we're at it, make it look at rel->pathlist not cheapest_total_path,
so that it gives the right answer independently of whether set_cheapest
has been done lately. That dependency looks pretty shaky in the context
of code like apply_scanjoin_target_to_paths, and even if it's not broken
today it'd certainly bite us at some point. (Nastily, unsafe use of the
old coding would almost always work; the hazard comes down to possibly
looking through a dangling pointer, and only once in a blue moon would
you find something there that resulted in the wrong answer.)
It now looks like it was a mistake for IS_DUMMY_REL to be a macro: if
there are any extensions using it, they'll continue to use the old
inadequate logic until they're recompiled, after which they'll fail
to load into server versions predating this fix. Hopefully there are
few such extensions.
Having fixed IS_DUMMY_REL, the special path for dummy rels in
apply_scanjoin_target_to_paths is unnecessary as well as being wrong,
so we can just drop it.
Also change a few places that were testing for partitioned-ness of a
planner relation but not using IS_PARTITIONED_REL for the purpose; that
seems unsafe as well as inconsistent, plus it required an ugly hack in
apply_scanjoin_target_to_paths.
In passing, save a few cycles in apply_scanjoin_target_to_paths by
skipping processing of pre-existing paths for partitioned rels,
and do some cosmetic cleanup and comment adjustment in that function.
I renamed IS_DUMMY_PATH to IS_DUMMY_APPEND with the intention of breaking
any code that might be using it, since in almost every case that would
be wrong; IS_DUMMY_REL is what to be using instead.
In HEAD, also make set_dummy_rel_pathlist static (since it's no longer
used from outside allpaths.c), and delete is_dummy_plan, since it's no
longer used anywhere.
Back-patch as appropriate into v11 and v10.
Tom Lane and Julien Rouhaud
Discussion: https://postgr.es/m/15669-02fb3296cca26203@postgresql.org
2019-03-07 20:21:52 +01:00
|
|
|
/*
|
|
|
|
* A relation that's been proven empty will have one path that is dummy
|
|
|
|
* (but might have projection paths on top). For historical reasons,
|
|
|
|
* this is provided as a macro that wraps is_dummy_rel().
|
|
|
|
*/
|
|
|
|
#define IS_DUMMY_REL(r) is_dummy_rel(r)
|
|
|
|
extern bool is_dummy_rel(RelOptInfo *rel);
|
2012-01-28 01:26:38 +01:00
|
|
|
|
2010-10-14 22:56:39 +02:00
|
|
|
/*
|
|
|
|
* MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted
|
|
|
|
* results from several member plans to produce similarly-sorted output.
|
|
|
|
*/
|
|
|
|
typedef struct MergeAppendPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
List *subpaths; /* list of component Paths */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality limit_tuples; /* hard limit on output tuples, or -1 */
|
2010-10-14 22:56:39 +02:00
|
|
|
} MergeAppendPath;
|
|
|
|
|
2002-11-06 01:00:45 +01:00
|
|
|
/*
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
* GroupResultPath represents use of a Result plan node to compute the
|
|
|
|
* output of a degenerate GROUP BY case, wherein we know we should produce
|
|
|
|
* exactly one row, which might then be filtered by a HAVING qual.
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
*
|
|
|
|
* Note that quals is a list of bare clauses, not RestrictInfos.
|
2002-11-06 01:00:45 +01:00
|
|
|
*/
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
typedef struct GroupResultPath
|
2002-11-06 01:00:45 +01:00
|
|
|
{
|
|
|
|
Path path;
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
List *quals;
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
} GroupResultPath;
|
2002-11-06 01:00:45 +01:00
|
|
|
|
2002-11-30 06:21:03 +01:00
|
|
|
/*
|
|
|
|
* MaterialPath represents use of a Material plan node, i.e., caching of
|
|
|
|
* the output of its subpath. This is used when the subpath is expensive
|
|
|
|
* and needs to be scanned repeatedly, or when we need mark/restore ability
|
|
|
|
* and the subpath doesn't have it.
|
|
|
|
*/
|
|
|
|
typedef struct MaterialPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath;
|
|
|
|
} MaterialPath;
|
|
|
|
|
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner
can include this node type in the plan to have the executor cache the
results from the inner side of parameterized nested loop joins. This
allows caching of tuples for sets of parameters so that in the event that
the node sees the same parameter values again, it can just return the
cached tuples instead of rescanning the inner side of the join all over
again. Internally, result cache uses a hash table in order to quickly
find tuples that have been previously cached.
For certain data sets, this can significantly improve the performance of
joins. The best cases for using this new node type are for join problems
where a large portion of the tuples from the inner side of the join have
no join partner on the outer side of the join. In such cases, hash join
would have to hash values that are never looked up, thus bloating the hash
table and possibly causing it to multi-batch. Merge joins would have to
skip over all of the unmatched rows. If we use a nested loop join with a
result cache, then we only cache tuples that have at least one join
partner on the outer side of the join. The benefits of using a
parameterized nested loop with a result cache increase when there are
fewer distinct values being looked up and the number of lookups of each
value is large. Also, hash probes to lookup the cache can be much faster
than the hash probe in a hash join as it's common that the result cache's
hash table is much smaller than the hash join's due to result cache only
caching useful tuples rather than all tuples from the inner side of the
join. This variation in hash probe performance is more significant when
the hash join's hash table no longer fits into the CPU's L3 cache, but the
result cache's hash table does. The apparent "random" access of hash
buckets with each hash probe can cause a poor L3 cache hit ratio for large
hash tables. Smaller hash tables generally perform better.
The hash table used for the cache limits itself to not exceeding work_mem
* hash_mem_multiplier in size. We maintain a dlist of keys for this cache
and when we're adding new tuples and realize we've exceeded the memory
budget, we evict cache entries starting with the least recently used ones
until we have enough memory to add the new tuples to the cache.
For parameterized nested loop joins, we now consider using one of these
result cache nodes in between the nested loop node and its inner node. We
determine when this might be useful based on cost, which is primarily
driven off of what the expected cache hit ratio will be. Estimating the
cache hit ratio relies on having good distinct estimates on the nested
loop's parameters.
For now, the planner will only consider using a result cache for
parameterized nested loop joins. This works for both normal joins and
also for LATERAL type joins to subqueries. It is possible to use this new
node for other uses in the future. For example, to cache results from
correlated subqueries. However, that's not done here due to some
difficulties obtaining a distinct estimation on the outer plan to
calculate the estimated cache hit ratio. Currently we plan the inner plan
before planning the outer plan so there is no good way to know if a result
cache would be useful or not since we can't estimate the number of times
the subplan will be called until the outer plan is generated.
The functionality being added here is newly introducing a dependency on
the return value of estimate_num_groups() during the join search.
Previously, during the join search, we only ever needed to perform
selectivity estimations. With this commit, we need to use
estimate_num_groups() in order to estimate what the hit ratio on the
result cache will be. In simple terms, if we expect 10 distinct values
and we expect 1000 outer rows, then we'll estimate the hit ratio to be
99%. Since cache hits are very cheap compared to scanning the underlying
nodes on the inner side of the nested loop join, then this will
significantly reduce the planner's cost for the join. However, it's
fairly easy to see here that things will go bad when estimate_num_groups()
incorrectly returns a value that's significantly lower than the actual
number of distinct values. If this happens then that may cause us to make
use of a nested loop join with a result cache instead of some other join
type, such as a merge or hash join. Our distinct estimations have been
known to be a source of trouble in the past, so the extra reliance on them
here could cause the planner to choose slower plans than it did previous
to having this feature. Distinct estimations are also fairly hard to
estimate accurately when several tables have been joined already or when a
WHERE clause filters out a set of values that are correlated to the
expressions we're estimating the number of distinct value for.
For now, the costing we perform during query planning for result caches
does put quite a bit of faith in the distinct estimations being accurate.
When these are accurate then we should generally see faster execution
times for plans containing a result cache. However, in the real world, we
may find that we need to either change the costings to put less trust in
the distinct estimations being accurate or perhaps even disable this
feature by default. There's always an element of risk when we teach the
query planner to do new tricks that it decides to use that new trick at
the wrong time and causes a regression. Users may opt to get the old
behavior by turning the feature off using the enable_resultcache GUC.
Currently, this is enabled by default. It remains to be seen if we'll
maintain that setting for the release.
Additionally, the name "Result Cache" is the best name I could think of
for this new node at the time I started writing the patch. Nobody seems
to strongly dislike the name. A few people did suggest other names but no
other name seemed to dominate in the brief discussion that there was about
names. Let's allow the beta period to see if the current name pleases
enough people. If there's some consensus on a better name, then we can
change it before the release. Please see the 2nd discussion link below
for the discussion on the "Result Cache" name.
Author: David Rowley
Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie
Tested-By: Konstantin Knizhnik
Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
|
|
|
/*
|
2021-07-14 02:43:58 +02:00
|
|
|
* MemoizePath represents a Memoize plan node, i.e., a cache that caches
|
|
|
|
* tuples from parameterized paths to save the underlying node from having to
|
|
|
|
* be rescanned for parameter values which are already cached.
|
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner
can include this node type in the plan to have the executor cache the
results from the inner side of parameterized nested loop joins. This
allows caching of tuples for sets of parameters so that in the event that
the node sees the same parameter values again, it can just return the
cached tuples instead of rescanning the inner side of the join all over
again. Internally, result cache uses a hash table in order to quickly
find tuples that have been previously cached.
For certain data sets, this can significantly improve the performance of
joins. The best cases for using this new node type are for join problems
where a large portion of the tuples from the inner side of the join have
no join partner on the outer side of the join. In such cases, hash join
would have to hash values that are never looked up, thus bloating the hash
table and possibly causing it to multi-batch. Merge joins would have to
skip over all of the unmatched rows. If we use a nested loop join with a
result cache, then we only cache tuples that have at least one join
partner on the outer side of the join. The benefits of using a
parameterized nested loop with a result cache increase when there are
fewer distinct values being looked up and the number of lookups of each
value is large. Also, hash probes to lookup the cache can be much faster
than the hash probe in a hash join as it's common that the result cache's
hash table is much smaller than the hash join's due to result cache only
caching useful tuples rather than all tuples from the inner side of the
join. This variation in hash probe performance is more significant when
the hash join's hash table no longer fits into the CPU's L3 cache, but the
result cache's hash table does. The apparent "random" access of hash
buckets with each hash probe can cause a poor L3 cache hit ratio for large
hash tables. Smaller hash tables generally perform better.
The hash table used for the cache limits itself to not exceeding work_mem
* hash_mem_multiplier in size. We maintain a dlist of keys for this cache
and when we're adding new tuples and realize we've exceeded the memory
budget, we evict cache entries starting with the least recently used ones
until we have enough memory to add the new tuples to the cache.
For parameterized nested loop joins, we now consider using one of these
result cache nodes in between the nested loop node and its inner node. We
determine when this might be useful based on cost, which is primarily
driven off of what the expected cache hit ratio will be. Estimating the
cache hit ratio relies on having good distinct estimates on the nested
loop's parameters.
For now, the planner will only consider using a result cache for
parameterized nested loop joins. This works for both normal joins and
also for LATERAL type joins to subqueries. It is possible to use this new
node for other uses in the future. For example, to cache results from
correlated subqueries. However, that's not done here due to some
difficulties obtaining a distinct estimation on the outer plan to
calculate the estimated cache hit ratio. Currently we plan the inner plan
before planning the outer plan so there is no good way to know if a result
cache would be useful or not since we can't estimate the number of times
the subplan will be called until the outer plan is generated.
The functionality being added here is newly introducing a dependency on
the return value of estimate_num_groups() during the join search.
Previously, during the join search, we only ever needed to perform
selectivity estimations. With this commit, we need to use
estimate_num_groups() in order to estimate what the hit ratio on the
result cache will be. In simple terms, if we expect 10 distinct values
and we expect 1000 outer rows, then we'll estimate the hit ratio to be
99%. Since cache hits are very cheap compared to scanning the underlying
nodes on the inner side of the nested loop join, then this will
significantly reduce the planner's cost for the join. However, it's
fairly easy to see here that things will go bad when estimate_num_groups()
incorrectly returns a value that's significantly lower than the actual
number of distinct values. If this happens then that may cause us to make
use of a nested loop join with a result cache instead of some other join
type, such as a merge or hash join. Our distinct estimations have been
known to be a source of trouble in the past, so the extra reliance on them
here could cause the planner to choose slower plans than it did previous
to having this feature. Distinct estimations are also fairly hard to
estimate accurately when several tables have been joined already or when a
WHERE clause filters out a set of values that are correlated to the
expressions we're estimating the number of distinct value for.
For now, the costing we perform during query planning for result caches
does put quite a bit of faith in the distinct estimations being accurate.
When these are accurate then we should generally see faster execution
times for plans containing a result cache. However, in the real world, we
may find that we need to either change the costings to put less trust in
the distinct estimations being accurate or perhaps even disable this
feature by default. There's always an element of risk when we teach the
query planner to do new tricks that it decides to use that new trick at
the wrong time and causes a regression. Users may opt to get the old
behavior by turning the feature off using the enable_resultcache GUC.
Currently, this is enabled by default. It remains to be seen if we'll
maintain that setting for the release.
Additionally, the name "Result Cache" is the best name I could think of
for this new node at the time I started writing the patch. Nobody seems
to strongly dislike the name. A few people did suggest other names but no
other name seemed to dominate in the brief discussion that there was about
names. Let's allow the beta period to see if the current name pleases
enough people. If there's some consensus on a better name, then we can
change it before the release. Please see the 2nd discussion link below
for the discussion on the "Result Cache" name.
Author: David Rowley
Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie
Tested-By: Konstantin Knizhnik
Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
|
|
|
*/
|
2021-07-14 02:43:58 +02:00
|
|
|
typedef struct MemoizePath
|
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner
can include this node type in the plan to have the executor cache the
results from the inner side of parameterized nested loop joins. This
allows caching of tuples for sets of parameters so that in the event that
the node sees the same parameter values again, it can just return the
cached tuples instead of rescanning the inner side of the join all over
again. Internally, result cache uses a hash table in order to quickly
find tuples that have been previously cached.
For certain data sets, this can significantly improve the performance of
joins. The best cases for using this new node type are for join problems
where a large portion of the tuples from the inner side of the join have
no join partner on the outer side of the join. In such cases, hash join
would have to hash values that are never looked up, thus bloating the hash
table and possibly causing it to multi-batch. Merge joins would have to
skip over all of the unmatched rows. If we use a nested loop join with a
result cache, then we only cache tuples that have at least one join
partner on the outer side of the join. The benefits of using a
parameterized nested loop with a result cache increase when there are
fewer distinct values being looked up and the number of lookups of each
value is large. Also, hash probes to lookup the cache can be much faster
than the hash probe in a hash join as it's common that the result cache's
hash table is much smaller than the hash join's due to result cache only
caching useful tuples rather than all tuples from the inner side of the
join. This variation in hash probe performance is more significant when
the hash join's hash table no longer fits into the CPU's L3 cache, but the
result cache's hash table does. The apparent "random" access of hash
buckets with each hash probe can cause a poor L3 cache hit ratio for large
hash tables. Smaller hash tables generally perform better.
The hash table used for the cache limits itself to not exceeding work_mem
* hash_mem_multiplier in size. We maintain a dlist of keys for this cache
and when we're adding new tuples and realize we've exceeded the memory
budget, we evict cache entries starting with the least recently used ones
until we have enough memory to add the new tuples to the cache.
For parameterized nested loop joins, we now consider using one of these
result cache nodes in between the nested loop node and its inner node. We
determine when this might be useful based on cost, which is primarily
driven off of what the expected cache hit ratio will be. Estimating the
cache hit ratio relies on having good distinct estimates on the nested
loop's parameters.
For now, the planner will only consider using a result cache for
parameterized nested loop joins. This works for both normal joins and
also for LATERAL type joins to subqueries. It is possible to use this new
node for other uses in the future. For example, to cache results from
correlated subqueries. However, that's not done here due to some
difficulties obtaining a distinct estimation on the outer plan to
calculate the estimated cache hit ratio. Currently we plan the inner plan
before planning the outer plan so there is no good way to know if a result
cache would be useful or not since we can't estimate the number of times
the subplan will be called until the outer plan is generated.
The functionality being added here is newly introducing a dependency on
the return value of estimate_num_groups() during the join search.
Previously, during the join search, we only ever needed to perform
selectivity estimations. With this commit, we need to use
estimate_num_groups() in order to estimate what the hit ratio on the
result cache will be. In simple terms, if we expect 10 distinct values
and we expect 1000 outer rows, then we'll estimate the hit ratio to be
99%. Since cache hits are very cheap compared to scanning the underlying
nodes on the inner side of the nested loop join, then this will
significantly reduce the planner's cost for the join. However, it's
fairly easy to see here that things will go bad when estimate_num_groups()
incorrectly returns a value that's significantly lower than the actual
number of distinct values. If this happens then that may cause us to make
use of a nested loop join with a result cache instead of some other join
type, such as a merge or hash join. Our distinct estimations have been
known to be a source of trouble in the past, so the extra reliance on them
here could cause the planner to choose slower plans than it did previous
to having this feature. Distinct estimations are also fairly hard to
estimate accurately when several tables have been joined already or when a
WHERE clause filters out a set of values that are correlated to the
expressions we're estimating the number of distinct value for.
For now, the costing we perform during query planning for result caches
does put quite a bit of faith in the distinct estimations being accurate.
When these are accurate then we should generally see faster execution
times for plans containing a result cache. However, in the real world, we
may find that we need to either change the costings to put less trust in
the distinct estimations being accurate or perhaps even disable this
feature by default. There's always an element of risk when we teach the
query planner to do new tricks that it decides to use that new trick at
the wrong time and causes a regression. Users may opt to get the old
behavior by turning the feature off using the enable_resultcache GUC.
Currently, this is enabled by default. It remains to be seen if we'll
maintain that setting for the release.
Additionally, the name "Result Cache" is the best name I could think of
for this new node at the time I started writing the patch. Nobody seems
to strongly dislike the name. A few people did suggest other names but no
other name seemed to dominate in the brief discussion that there was about
names. Let's allow the beta period to see if the current name pleases
enough people. If there's some consensus on a better name, then we can
change it before the release. Please see the 2nd discussion link below
for the discussion on the "Result Cache" name.
Author: David Rowley
Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie
Tested-By: Konstantin Knizhnik
Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* outerpath to cache tuples from */
|
2022-12-05 18:36:40 +01:00
|
|
|
List *hash_operators; /* OIDs of hash equality ops for cache keys */
|
|
|
|
List *param_exprs; /* expressions that are cache keys */
|
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner
can include this node type in the plan to have the executor cache the
results from the inner side of parameterized nested loop joins. This
allows caching of tuples for sets of parameters so that in the event that
the node sees the same parameter values again, it can just return the
cached tuples instead of rescanning the inner side of the join all over
again. Internally, result cache uses a hash table in order to quickly
find tuples that have been previously cached.
For certain data sets, this can significantly improve the performance of
joins. The best cases for using this new node type are for join problems
where a large portion of the tuples from the inner side of the join have
no join partner on the outer side of the join. In such cases, hash join
would have to hash values that are never looked up, thus bloating the hash
table and possibly causing it to multi-batch. Merge joins would have to
skip over all of the unmatched rows. If we use a nested loop join with a
result cache, then we only cache tuples that have at least one join
partner on the outer side of the join. The benefits of using a
parameterized nested loop with a result cache increase when there are
fewer distinct values being looked up and the number of lookups of each
value is large. Also, hash probes to lookup the cache can be much faster
than the hash probe in a hash join as it's common that the result cache's
hash table is much smaller than the hash join's due to result cache only
caching useful tuples rather than all tuples from the inner side of the
join. This variation in hash probe performance is more significant when
the hash join's hash table no longer fits into the CPU's L3 cache, but the
result cache's hash table does. The apparent "random" access of hash
buckets with each hash probe can cause a poor L3 cache hit ratio for large
hash tables. Smaller hash tables generally perform better.
The hash table used for the cache limits itself to not exceeding work_mem
* hash_mem_multiplier in size. We maintain a dlist of keys for this cache
and when we're adding new tuples and realize we've exceeded the memory
budget, we evict cache entries starting with the least recently used ones
until we have enough memory to add the new tuples to the cache.
For parameterized nested loop joins, we now consider using one of these
result cache nodes in between the nested loop node and its inner node. We
determine when this might be useful based on cost, which is primarily
driven off of what the expected cache hit ratio will be. Estimating the
cache hit ratio relies on having good distinct estimates on the nested
loop's parameters.
For now, the planner will only consider using a result cache for
parameterized nested loop joins. This works for both normal joins and
also for LATERAL type joins to subqueries. It is possible to use this new
node for other uses in the future. For example, to cache results from
correlated subqueries. However, that's not done here due to some
difficulties obtaining a distinct estimation on the outer plan to
calculate the estimated cache hit ratio. Currently we plan the inner plan
before planning the outer plan so there is no good way to know if a result
cache would be useful or not since we can't estimate the number of times
the subplan will be called until the outer plan is generated.
The functionality being added here is newly introducing a dependency on
the return value of estimate_num_groups() during the join search.
Previously, during the join search, we only ever needed to perform
selectivity estimations. With this commit, we need to use
estimate_num_groups() in order to estimate what the hit ratio on the
result cache will be. In simple terms, if we expect 10 distinct values
and we expect 1000 outer rows, then we'll estimate the hit ratio to be
99%. Since cache hits are very cheap compared to scanning the underlying
nodes on the inner side of the nested loop join, then this will
significantly reduce the planner's cost for the join. However, it's
fairly easy to see here that things will go bad when estimate_num_groups()
incorrectly returns a value that's significantly lower than the actual
number of distinct values. If this happens then that may cause us to make
use of a nested loop join with a result cache instead of some other join
type, such as a merge or hash join. Our distinct estimations have been
known to be a source of trouble in the past, so the extra reliance on them
here could cause the planner to choose slower plans than it did previous
to having this feature. Distinct estimations are also fairly hard to
estimate accurately when several tables have been joined already or when a
WHERE clause filters out a set of values that are correlated to the
expressions we're estimating the number of distinct value for.
For now, the costing we perform during query planning for result caches
does put quite a bit of faith in the distinct estimations being accurate.
When these are accurate then we should generally see faster execution
times for plans containing a result cache. However, in the real world, we
may find that we need to either change the costings to put less trust in
the distinct estimations being accurate or perhaps even disable this
feature by default. There's always an element of risk when we teach the
query planner to do new tricks that it decides to use that new trick at
the wrong time and causes a regression. Users may opt to get the old
behavior by turning the feature off using the enable_resultcache GUC.
Currently, this is enabled by default. It remains to be seen if we'll
maintain that setting for the release.
Additionally, the name "Result Cache" is the best name I could think of
for this new node at the time I started writing the patch. Nobody seems
to strongly dislike the name. A few people did suggest other names but no
other name seemed to dominate in the brief discussion that there was about
names. Let's allow the beta period to see if the current name pleases
enough people. If there's some consensus on a better name, then we can
change it before the release. Please see the 2nd discussion link below
for the discussion on the "Result Cache" name.
Author: David Rowley
Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie
Tested-By: Konstantin Knizhnik
Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
|
|
|
bool singlerow; /* true if the cache entry is to be marked as
|
|
|
|
* complete after caching the first record. */
|
2021-11-23 22:06:59 +01:00
|
|
|
bool binary_mode; /* true when cache key should be compared bit
|
|
|
|
* by bit, false when using hash equality ops */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality calls; /* expected number of rescans */
|
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner
can include this node type in the plan to have the executor cache the
results from the inner side of parameterized nested loop joins. This
allows caching of tuples for sets of parameters so that in the event that
the node sees the same parameter values again, it can just return the
cached tuples instead of rescanning the inner side of the join all over
again. Internally, result cache uses a hash table in order to quickly
find tuples that have been previously cached.
For certain data sets, this can significantly improve the performance of
joins. The best cases for using this new node type are for join problems
where a large portion of the tuples from the inner side of the join have
no join partner on the outer side of the join. In such cases, hash join
would have to hash values that are never looked up, thus bloating the hash
table and possibly causing it to multi-batch. Merge joins would have to
skip over all of the unmatched rows. If we use a nested loop join with a
result cache, then we only cache tuples that have at least one join
partner on the outer side of the join. The benefits of using a
parameterized nested loop with a result cache increase when there are
fewer distinct values being looked up and the number of lookups of each
value is large. Also, hash probes to lookup the cache can be much faster
than the hash probe in a hash join as it's common that the result cache's
hash table is much smaller than the hash join's due to result cache only
caching useful tuples rather than all tuples from the inner side of the
join. This variation in hash probe performance is more significant when
the hash join's hash table no longer fits into the CPU's L3 cache, but the
result cache's hash table does. The apparent "random" access of hash
buckets with each hash probe can cause a poor L3 cache hit ratio for large
hash tables. Smaller hash tables generally perform better.
The hash table used for the cache limits itself to not exceeding work_mem
* hash_mem_multiplier in size. We maintain a dlist of keys for this cache
and when we're adding new tuples and realize we've exceeded the memory
budget, we evict cache entries starting with the least recently used ones
until we have enough memory to add the new tuples to the cache.
For parameterized nested loop joins, we now consider using one of these
result cache nodes in between the nested loop node and its inner node. We
determine when this might be useful based on cost, which is primarily
driven off of what the expected cache hit ratio will be. Estimating the
cache hit ratio relies on having good distinct estimates on the nested
loop's parameters.
For now, the planner will only consider using a result cache for
parameterized nested loop joins. This works for both normal joins and
also for LATERAL type joins to subqueries. It is possible to use this new
node for other uses in the future. For example, to cache results from
correlated subqueries. However, that's not done here due to some
difficulties obtaining a distinct estimation on the outer plan to
calculate the estimated cache hit ratio. Currently we plan the inner plan
before planning the outer plan so there is no good way to know if a result
cache would be useful or not since we can't estimate the number of times
the subplan will be called until the outer plan is generated.
The functionality being added here is newly introducing a dependency on
the return value of estimate_num_groups() during the join search.
Previously, during the join search, we only ever needed to perform
selectivity estimations. With this commit, we need to use
estimate_num_groups() in order to estimate what the hit ratio on the
result cache will be. In simple terms, if we expect 10 distinct values
and we expect 1000 outer rows, then we'll estimate the hit ratio to be
99%. Since cache hits are very cheap compared to scanning the underlying
nodes on the inner side of the nested loop join, then this will
significantly reduce the planner's cost for the join. However, it's
fairly easy to see here that things will go bad when estimate_num_groups()
incorrectly returns a value that's significantly lower than the actual
number of distinct values. If this happens then that may cause us to make
use of a nested loop join with a result cache instead of some other join
type, such as a merge or hash join. Our distinct estimations have been
known to be a source of trouble in the past, so the extra reliance on them
here could cause the planner to choose slower plans than it did previous
to having this feature. Distinct estimations are also fairly hard to
estimate accurately when several tables have been joined already or when a
WHERE clause filters out a set of values that are correlated to the
expressions we're estimating the number of distinct value for.
For now, the costing we perform during query planning for result caches
does put quite a bit of faith in the distinct estimations being accurate.
When these are accurate then we should generally see faster execution
times for plans containing a result cache. However, in the real world, we
may find that we need to either change the costings to put less trust in
the distinct estimations being accurate or perhaps even disable this
feature by default. There's always an element of risk when we teach the
query planner to do new tricks that it decides to use that new trick at
the wrong time and causes a regression. Users may opt to get the old
behavior by turning the feature off using the enable_resultcache GUC.
Currently, this is enabled by default. It remains to be seen if we'll
maintain that setting for the release.
Additionally, the name "Result Cache" is the best name I could think of
for this new node at the time I started writing the patch. Nobody seems
to strongly dislike the name. A few people did suggest other names but no
other name seemed to dominate in the brief discussion that there was about
names. Let's allow the beta period to see if the current name pleases
enough people. If there's some consensus on a better name, then we can
change it before the release. Please see the 2nd discussion link below
for the discussion on the "Result Cache" name.
Author: David Rowley
Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie
Tested-By: Konstantin Knizhnik
Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
|
|
|
uint32 est_entries; /* The maximum number of entries that the
|
|
|
|
* planner expects will fit in the cache, or 0
|
|
|
|
* if unknown */
|
2021-07-14 02:43:58 +02:00
|
|
|
} MemoizePath;
|
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner
can include this node type in the plan to have the executor cache the
results from the inner side of parameterized nested loop joins. This
allows caching of tuples for sets of parameters so that in the event that
the node sees the same parameter values again, it can just return the
cached tuples instead of rescanning the inner side of the join all over
again. Internally, result cache uses a hash table in order to quickly
find tuples that have been previously cached.
For certain data sets, this can significantly improve the performance of
joins. The best cases for using this new node type are for join problems
where a large portion of the tuples from the inner side of the join have
no join partner on the outer side of the join. In such cases, hash join
would have to hash values that are never looked up, thus bloating the hash
table and possibly causing it to multi-batch. Merge joins would have to
skip over all of the unmatched rows. If we use a nested loop join with a
result cache, then we only cache tuples that have at least one join
partner on the outer side of the join. The benefits of using a
parameterized nested loop with a result cache increase when there are
fewer distinct values being looked up and the number of lookups of each
value is large. Also, hash probes to lookup the cache can be much faster
than the hash probe in a hash join as it's common that the result cache's
hash table is much smaller than the hash join's due to result cache only
caching useful tuples rather than all tuples from the inner side of the
join. This variation in hash probe performance is more significant when
the hash join's hash table no longer fits into the CPU's L3 cache, but the
result cache's hash table does. The apparent "random" access of hash
buckets with each hash probe can cause a poor L3 cache hit ratio for large
hash tables. Smaller hash tables generally perform better.
The hash table used for the cache limits itself to not exceeding work_mem
* hash_mem_multiplier in size. We maintain a dlist of keys for this cache
and when we're adding new tuples and realize we've exceeded the memory
budget, we evict cache entries starting with the least recently used ones
until we have enough memory to add the new tuples to the cache.
For parameterized nested loop joins, we now consider using one of these
result cache nodes in between the nested loop node and its inner node. We
determine when this might be useful based on cost, which is primarily
driven off of what the expected cache hit ratio will be. Estimating the
cache hit ratio relies on having good distinct estimates on the nested
loop's parameters.
For now, the planner will only consider using a result cache for
parameterized nested loop joins. This works for both normal joins and
also for LATERAL type joins to subqueries. It is possible to use this new
node for other uses in the future. For example, to cache results from
correlated subqueries. However, that's not done here due to some
difficulties obtaining a distinct estimation on the outer plan to
calculate the estimated cache hit ratio. Currently we plan the inner plan
before planning the outer plan so there is no good way to know if a result
cache would be useful or not since we can't estimate the number of times
the subplan will be called until the outer plan is generated.
The functionality being added here is newly introducing a dependency on
the return value of estimate_num_groups() during the join search.
Previously, during the join search, we only ever needed to perform
selectivity estimations. With this commit, we need to use
estimate_num_groups() in order to estimate what the hit ratio on the
result cache will be. In simple terms, if we expect 10 distinct values
and we expect 1000 outer rows, then we'll estimate the hit ratio to be
99%. Since cache hits are very cheap compared to scanning the underlying
nodes on the inner side of the nested loop join, then this will
significantly reduce the planner's cost for the join. However, it's
fairly easy to see here that things will go bad when estimate_num_groups()
incorrectly returns a value that's significantly lower than the actual
number of distinct values. If this happens then that may cause us to make
use of a nested loop join with a result cache instead of some other join
type, such as a merge or hash join. Our distinct estimations have been
known to be a source of trouble in the past, so the extra reliance on them
here could cause the planner to choose slower plans than it did previous
to having this feature. Distinct estimations are also fairly hard to
estimate accurately when several tables have been joined already or when a
WHERE clause filters out a set of values that are correlated to the
expressions we're estimating the number of distinct value for.
For now, the costing we perform during query planning for result caches
does put quite a bit of faith in the distinct estimations being accurate.
When these are accurate then we should generally see faster execution
times for plans containing a result cache. However, in the real world, we
may find that we need to either change the costings to put less trust in
the distinct estimations being accurate or perhaps even disable this
feature by default. There's always an element of risk when we teach the
query planner to do new tricks that it decides to use that new trick at
the wrong time and causes a regression. Users may opt to get the old
behavior by turning the feature off using the enable_resultcache GUC.
Currently, this is enabled by default. It remains to be seen if we'll
maintain that setting for the release.
Additionally, the name "Result Cache" is the best name I could think of
for this new node at the time I started writing the patch. Nobody seems
to strongly dislike the name. A few people did suggest other names but no
other name seemed to dominate in the brief discussion that there was about
names. Let's allow the beta period to see if the current name pleases
enough people. If there's some consensus on a better name, then we can
change it before the release. Please see the 2nd discussion link below
for the discussion on the "Result Cache" name.
Author: David Rowley
Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie
Tested-By: Konstantin Knizhnik
Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
|
|
|
|
2003-01-20 19:55:07 +01:00
|
|
|
/*
|
|
|
|
* UniquePath represents elimination of distinct rows from the output of
|
|
|
|
* its subpath.
|
|
|
|
*
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
* This can represent significantly different plans: either hash-based or
|
|
|
|
* sort-based implementation, or a no-op if the input path can be proven
|
|
|
|
* distinct already. The decision is sufficiently localized that it's not
|
|
|
|
* worth having separate Path node types. (Note: in the no-op case, we could
|
|
|
|
* eliminate the UniquePath node entirely and just return the subpath; but
|
|
|
|
* it's convenient to have a UniquePath in the path tree to signal upper-level
|
|
|
|
* routines that the input is known distinct.)
|
2003-01-20 19:55:07 +01:00
|
|
|
*/
|
2021-07-21 11:03:25 +02:00
|
|
|
typedef enum UniquePathMethod
|
2004-01-05 19:04:39 +01:00
|
|
|
{
|
|
|
|
UNIQUE_PATH_NOOP, /* input is known unique already */
|
|
|
|
UNIQUE_PATH_HASH, /* use hashing */
|
|
|
|
UNIQUE_PATH_SORT /* use sorting */
|
|
|
|
} UniquePathMethod;
|
|
|
|
|
2003-01-20 19:55:07 +01:00
|
|
|
typedef struct UniquePath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath;
|
2004-01-05 19:04:39 +01:00
|
|
|
UniquePathMethod umethod;
|
2008-08-14 20:48:00 +02:00
|
|
|
List *in_operators; /* equality operators of the IN clause */
|
|
|
|
List *uniq_exprs; /* expressions to be made unique */
|
2003-01-20 19:55:07 +01:00
|
|
|
} UniquePath;
|
|
|
|
|
Add a Gather executor node.
A Gather executor node runs any number of copies of a plan in an equal
number of workers and merges all of the results into a single tuple
stream. It can also run the plan itself, if the workers are
unavailable or haven't started up yet. It is intended to work with
the Partial Seq Scan node which will be added in future commits.
It could also be used to implement parallel query of a different sort
by itself, without help from Partial Seq Scan, if the single_copy mode
is used. In that mode, a worker executes the plan, and the parallel
leader does not, merely collecting the worker's results. So, a Gather
node could be inserted into a plan to split the execution of that plan
across two processes. Nested Gather nodes aren't currently supported,
but we might want to add support for that in the future.
There's nothing in the planner to actually generate Gather nodes yet,
so it's not quite time to break out the champagne. But we're getting
close.
Amit Kapila. Some designs suggestions were provided by me, and I also
reviewed the patch. Single-copy mode, documentation, and other minor
changes also by me.
2015-10-01 01:23:36 +02:00
|
|
|
/*
|
|
|
|
* GatherPath runs several copies of a plan in parallel and collects the
|
|
|
|
* results. The parallel leader may also execute the plan, unless the
|
|
|
|
* single_copy flag is set.
|
|
|
|
*/
|
|
|
|
typedef struct GatherPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path for each worker */
|
2016-09-14 21:43:26 +02:00
|
|
|
bool single_copy; /* don't execute path more than once */
|
2017-04-01 03:01:20 +02:00
|
|
|
int num_workers; /* number of workers sought to help */
|
Add a Gather executor node.
A Gather executor node runs any number of copies of a plan in an equal
number of workers and merges all of the results into a single tuple
stream. It can also run the plan itself, if the workers are
unavailable or haven't started up yet. It is intended to work with
the Partial Seq Scan node which will be added in future commits.
It could also be used to implement parallel query of a different sort
by itself, without help from Partial Seq Scan, if the single_copy mode
is used. In that mode, a worker executes the plan, and the parallel
leader does not, merely collecting the worker's results. So, a Gather
node could be inserted into a plan to split the execution of that plan
across two processes. Nested Gather nodes aren't currently supported,
but we might want to add support for that in the future.
There's nothing in the planner to actually generate Gather nodes yet,
so it's not quite time to break out the champagne. But we're getting
close.
Amit Kapila. Some designs suggestions were provided by me, and I also
reviewed the patch. Single-copy mode, documentation, and other minor
changes also by me.
2015-10-01 01:23:36 +02:00
|
|
|
} GatherPath;
|
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
/*
|
Force rescanning of parallel-aware scan nodes below a Gather[Merge].
The ExecReScan machinery contains various optimizations for postponing
or skipping rescans of plan subtrees; for example a HashAgg node may
conclude that it can re-use the table it built before, instead of
re-reading its input subtree. But that is wrong if the input contains
a parallel-aware table scan node, since the portion of the table scanned
by the leader process is likely to vary from one rescan to the next.
This explains the timing-dependent buildfarm failures we saw after
commit a2b70c89c.
The established mechanism for showing that a plan node's output is
potentially variable is to mark it as depending on some runtime Param.
Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC
parameter number, but carries no actual value) associated with each Gather
or GatherMerge node, mark parallel-aware nodes below that node as dependent
on that Param, and arrange for ExecReScanGather[Merge] to flag that Param
as changed whenever the Gather[Merge] node is rescanned.
This solution breaks an undocumented assumption made by the parallel
executor logic, namely that all rescans of nodes below a Gather[Merge]
will happen synchronously during the ReScan of the top node itself.
But that's fundamentally contrary to the design of the ExecReScan code,
and so was doomed to fail someday anyway (even if you want to argue
that the bug being fixed here wasn't a failure of that assumption).
A follow-on patch will address that issue. In the meantime, the worst
that's expected to happen is that given very bad timing luck, the leader
might have to do all the work during a rescan, because workers think
they have nothing to do, if they are able to start up before the eventual
ReScan of the leader's parallel-aware table scan node has reset the
shared scan state.
Although this problem exists in 9.6, there does not seem to be any way
for it to manifest there. Without GatherMerge, it seems that a plan tree
that has a rescan-short-circuiting node below Gather will always also
have one above it that will short-circuit in the same cases, preventing
the Gather from being rescanned. Hence we won't take the risk of
back-patching this change into 9.6. But v10 needs it.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
|
|
|
* GatherMergePath runs several copies of a plan in parallel and collects
|
2019-06-24 23:17:04 +02:00
|
|
|
* the results, preserving their common sort order.
|
2017-03-09 13:40:36 +01:00
|
|
|
*/
|
|
|
|
typedef struct GatherMergePath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path for each worker */
|
|
|
|
int num_workers; /* number of workers sought to help */
|
|
|
|
} GatherMergePath;
|
|
|
|
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/*
|
|
|
|
* All join-type paths share these fields.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct JoinPath
|
1996-08-28 03:59:28 +02:00
|
|
|
{
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
pg_node_attr(abstract)
|
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
Path path;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
JoinType jointype;
|
|
|
|
|
2017-04-08 04:20:03 +02:00
|
|
|
bool inner_unique; /* each outer tuple provably matches no more
|
|
|
|
* than one inner tuple */
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
Path *outerjoinpath; /* path for the outer side of the join */
|
|
|
|
Path *innerjoinpath; /* path for the inner side of the join */
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
List *joinrestrictinfo; /* RestrictInfos to apply to join */
|
2000-04-12 19:17:23 +02:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
* See the notes for RelOptInfo and ParamPathInfo to understand why
|
|
|
|
* joinrestrictinfo is needed in JoinPath, and can't be merged into the
|
|
|
|
* parent RelOptInfo.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
1999-08-16 04:17:58 +02:00
|
|
|
} JoinPath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A nested-loop path needs no special fields.
|
|
|
|
*/
|
|
|
|
|
2021-08-08 16:55:51 +02:00
|
|
|
typedef struct NestPath
|
|
|
|
{
|
|
|
|
JoinPath jpath;
|
|
|
|
} NestPath;
|
1999-08-16 04:17:58 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A mergejoin path has these fields.
|
|
|
|
*
|
2009-11-15 03:45:35 +01:00
|
|
|
* Unlike other path types, a MergePath node doesn't represent just a single
|
|
|
|
* run-time plan node: it can represent up to four. Aside from the MergeJoin
|
|
|
|
* node itself, there can be a Sort node for the outer input, a Sort node
|
|
|
|
* for the inner input, and/or a Material node for the inner input. We could
|
|
|
|
* represent these nodes by separate path nodes, but considering how many
|
|
|
|
* different merge paths are investigated during a complex join problem,
|
|
|
|
* it seems better to avoid unnecessary palloc overhead.
|
|
|
|
*
|
2000-02-19 00:47:31 +01:00
|
|
|
* path_mergeclauses lists the clauses (in the form of RestrictInfos)
|
2007-01-20 21:45:41 +01:00
|
|
|
* that will be used in the merge.
|
2000-02-19 00:47:31 +01:00
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* Note that the mergeclauses are a subset of the parent relation's
|
|
|
|
* restriction-clause list. Any join clauses that are not mergejoinable
|
|
|
|
* appear only in the parent's restrict list, and must be checked by a
|
|
|
|
* qpqual at execution time.
|
2000-02-19 00:47:31 +01:00
|
|
|
*
|
|
|
|
* outersortkeys (resp. innersortkeys) is NIL if the outer path
|
|
|
|
* (resp. inner path) is already ordered appropriately for the
|
|
|
|
* mergejoin. If it is not NIL then it is a PathKeys list describing
|
2009-11-15 03:45:35 +01:00
|
|
|
* the ordering that must be created by an explicit Sort node.
|
|
|
|
*
|
2017-08-16 06:22:32 +02:00
|
|
|
* skip_mark_restore is true if the executor need not do mark/restore calls.
|
2017-04-08 04:20:03 +02:00
|
|
|
* Mark/restore overhead is usually required, but can be skipped if we know
|
|
|
|
* that the executor need find only one match per outer tuple, and that the
|
|
|
|
* mergeclauses are sufficient to identify a match. In such cases the
|
|
|
|
* executor can immediately advance the outer relation after processing a
|
2018-04-01 21:01:28 +02:00
|
|
|
* match, and therefore it need never back up the inner relation.
|
2017-04-08 04:20:03 +02:00
|
|
|
*
|
2017-08-16 06:22:32 +02:00
|
|
|
* materialize_inner is true if a Material node should be placed atop the
|
2009-11-15 03:45:35 +01:00
|
|
|
* inner input. This may appear with or without an inner Sort step.
|
1999-08-16 04:17:58 +02:00
|
|
|
*/
|
1999-02-12 18:25:05 +01:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
typedef struct MergePath
|
|
|
|
{
|
1999-02-12 18:25:05 +01:00
|
|
|
JoinPath jpath;
|
2000-02-19 00:47:31 +01:00
|
|
|
List *path_mergeclauses; /* join clauses to be used for merge */
|
2009-11-15 03:45:35 +01:00
|
|
|
List *outersortkeys; /* keys for explicit sort, if any */
|
|
|
|
List *innersortkeys; /* keys for explicit sort, if any */
|
2017-04-08 04:20:03 +02:00
|
|
|
bool skip_mark_restore; /* can executor skip mark/restore? */
|
2009-11-15 03:45:35 +01:00
|
|
|
bool materialize_inner; /* add Materialize to inner? */
|
1996-08-28 03:59:28 +02:00
|
|
|
} MergePath;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:55:44 +01:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* A hashjoin path has these fields.
|
|
|
|
*
|
|
|
|
* The remarks above for mergeclauses apply for hashclauses as well.
|
2000-02-19 00:47:31 +01:00
|
|
|
*
|
|
|
|
* Hashjoin does not care what order its inputs appear in, so we have
|
|
|
|
* no need for sortkeys.
|
1999-02-22 20:55:44 +01:00
|
|
|
*/
|
1996-08-28 03:59:28 +02:00
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
typedef struct HashPath
|
1996-08-28 03:59:28 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
JoinPath jpath;
|
|
|
|
List *path_hashclauses; /* join clauses used for hashing */
|
2009-03-26 18:15:35 +01:00
|
|
|
int num_batches; /* number of batches expected */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality inner_rows_total; /* total inner rows expected */
|
1999-08-16 04:17:58 +02:00
|
|
|
} HashPath;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
/*
|
|
|
|
* ProjectionPath represents a projection (that is, targetlist computation)
|
|
|
|
*
|
Refactor planning of projection steps that don't need a Result plan node.
The original upper-planner-pathification design (commit 3fc6e2d7f5b652b4)
assumed that we could always determine during Path formation whether or not
we would need a Result plan node to perform projection of a targetlist.
That turns out not to work very well, though, because createplan.c still
has some responsibilities for choosing the specific target list associated
with sorting/grouping nodes (in particular it might choose to add resjunk
columns for sorting). We might not ever refactor that --- doing so would
push more work into Path formation, which isn't attractive --- and we
certainly won't do so for 9.6. So, while create_projection_path and
apply_projection_to_path can tell for sure what will happen if the subpath
is projection-capable, they can't tell for sure when it isn't. This is at
least a latent bug in apply_projection_to_path, which might think it can
apply a target to a non-projecting node when the node will end up computing
something different.
Also, I'd tied the creation of a ProjectionPath node to whether or not a
Result is needed, but it turns out that we sometimes need a ProjectionPath
node anyway to avoid modifying a possibly-shared subpath node. Callers had
to use create_projection_path for such cases, and we added code to them
that knew about the potential omission of a Result node and attempted to
adjust the cost estimates for that. That was uncertainly correct and
definitely ugly/unmaintainable.
To fix, have create_projection_path explicitly check whether a Result
is needed and adjust its cost estimate accordingly, though it creates
a ProjectionPath in either case. apply_projection_to_path is now mostly
just an optimized version that can avoid creating an extra Path node when
the input is known to not be shared with any other live path. (There
is one case that create_projection_path doesn't handle, which is pushing
parallel-safe expressions below a Gather node. We could make it do that
by duplicating the GatherPath, but there seems no need as yet.)
create_projection_plan still has to recheck the tlist-match condition,
which means that if the matching situation does get changed by createplan.c
then we'll have made a slightly incorrect cost estimate. But there seems
no help for that in the near term, and I doubt it occurs often enough,
let alone would change planning decisions often enough, to be worth
stressing about.
I added a "dummypp" field to ProjectionPath to track whether
create_projection_path thinks a Result is needed. This is not really
necessary as-committed because create_projection_plan doesn't look at the
flag; but it seems like a good idea to remember what we thought when
forming the cost estimate, if only for debugging purposes.
In passing, get rid of the target_parallel parameter added to
apply_projection_to_path by commit 54f5c5150. I don't think that's a good
idea because it involves callers in what should be an internal decision,
and opens us up to missing optimization opportunities if callers think they
don't need to provide a valid flag, as most don't. For the moment, this
just costs us an extra has_parallel_hazard call when planning a Gather.
If that starts to look expensive, I think a better solution would be to
teach PathTarget to carry/cache knowledge of parallel-safety of its
contents.
2016-06-22 00:38:20 +02:00
|
|
|
* Nominally, this path node represents using a Result plan node to do a
|
|
|
|
* projection step. However, if the input plan node supports projection,
|
|
|
|
* we can just modify its output targetlist to do the required calculations
|
|
|
|
* directly, and not need a Result. In some places in the planner we can just
|
|
|
|
* jam the desired PathTarget into the input path node (and adjust its cost
|
|
|
|
* accordingly), so we don't need a ProjectionPath. But in other places
|
|
|
|
* it's necessary to not modify the input path node, so we need a separate
|
|
|
|
* ProjectionPath node, which is marked dummy to indicate that we intend to
|
|
|
|
* assign the work to the input plan node. The estimated cost for the
|
|
|
|
* ProjectionPath node will account for whether a Result will be used or not.
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
*/
|
|
|
|
typedef struct ProjectionPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
Refactor planning of projection steps that don't need a Result plan node.
The original upper-planner-pathification design (commit 3fc6e2d7f5b652b4)
assumed that we could always determine during Path formation whether or not
we would need a Result plan node to perform projection of a targetlist.
That turns out not to work very well, though, because createplan.c still
has some responsibilities for choosing the specific target list associated
with sorting/grouping nodes (in particular it might choose to add resjunk
columns for sorting). We might not ever refactor that --- doing so would
push more work into Path formation, which isn't attractive --- and we
certainly won't do so for 9.6. So, while create_projection_path and
apply_projection_to_path can tell for sure what will happen if the subpath
is projection-capable, they can't tell for sure when it isn't. This is at
least a latent bug in apply_projection_to_path, which might think it can
apply a target to a non-projecting node when the node will end up computing
something different.
Also, I'd tied the creation of a ProjectionPath node to whether or not a
Result is needed, but it turns out that we sometimes need a ProjectionPath
node anyway to avoid modifying a possibly-shared subpath node. Callers had
to use create_projection_path for such cases, and we added code to them
that knew about the potential omission of a Result node and attempted to
adjust the cost estimates for that. That was uncertainly correct and
definitely ugly/unmaintainable.
To fix, have create_projection_path explicitly check whether a Result
is needed and adjust its cost estimate accordingly, though it creates
a ProjectionPath in either case. apply_projection_to_path is now mostly
just an optimized version that can avoid creating an extra Path node when
the input is known to not be shared with any other live path. (There
is one case that create_projection_path doesn't handle, which is pushing
parallel-safe expressions below a Gather node. We could make it do that
by duplicating the GatherPath, but there seems no need as yet.)
create_projection_plan still has to recheck the tlist-match condition,
which means that if the matching situation does get changed by createplan.c
then we'll have made a slightly incorrect cost estimate. But there seems
no help for that in the near term, and I doubt it occurs often enough,
let alone would change planning decisions often enough, to be worth
stressing about.
I added a "dummypp" field to ProjectionPath to track whether
create_projection_path thinks a Result is needed. This is not really
necessary as-committed because create_projection_plan doesn't look at the
flag; but it seems like a good idea to remember what we thought when
forming the cost estimate, if only for debugging purposes.
In passing, get rid of the target_parallel parameter added to
apply_projection_to_path by commit 54f5c5150. I don't think that's a good
idea because it involves callers in what should be an internal decision,
and opens us up to missing optimization opportunities if callers think they
don't need to provide a valid flag, as most don't. For the moment, this
just costs us an extra has_parallel_hazard call when planning a Gather.
If that starts to look expensive, I think a better solution would be to
teach PathTarget to carry/cache knowledge of parallel-safety of its
contents.
2016-06-22 00:38:20 +02:00
|
|
|
bool dummypp; /* true if no separate Result is needed */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
} ProjectionPath;
|
|
|
|
|
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
|
|
|
/*
|
|
|
|
* ProjectSetPath represents evaluation of a targetlist that includes
|
|
|
|
* set-returning function(s), which will need to be implemented by a
|
|
|
|
* ProjectSet plan node.
|
|
|
|
*/
|
|
|
|
typedef struct ProjectSetPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
} ProjectSetPath;
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
/*
|
|
|
|
* SortPath represents an explicit sort step
|
|
|
|
*
|
|
|
|
* The sort keys are, by definition, the same as path.pathkeys.
|
|
|
|
*
|
|
|
|
* Note: the Sort plan node cannot project, so path.pathtarget must be the
|
|
|
|
* same as the input's pathtarget.
|
|
|
|
*/
|
|
|
|
typedef struct SortPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
} SortPath;
|
|
|
|
|
Implement Incremental Sort
Incremental Sort is an optimized variant of multikey sort for cases when
the input is already sorted by a prefix of the requested sort keys. For
example when the relation is already sorted by (key1, key2) and we need
to sort it by (key1, key2, key3) we can simply split the input rows into
groups having equal values in (key1, key2), and only sort/compare the
remaining column key3.
This has a number of benefits:
- Reduced memory consumption, because only a single group (determined by
values in the sorted prefix) needs to be kept in memory. This may also
eliminate the need to spill to disk.
- Lower startup cost, because Incremental Sort produce results after each
prefix group, which is beneficial for plans where startup cost matters
(like for example queries with LIMIT clause).
We consider both Sort and Incremental Sort, and decide based on costing.
The implemented algorithm operates in two different modes:
- Fetching a minimum number of tuples without check of equality on the
prefix keys, and sorting on all columns when safe.
- Fetching all tuples for a single prefix group and then sorting by
comparing only the remaining (non-prefix) keys.
We always start in the first mode, and employ a heuristic to switch into
the second mode if we believe it's beneficial - the goal is to minimize
the number of unnecessary comparions while keeping memory consumption
below work_mem.
This is a very old patch series. The idea was originally proposed by
Alexander Korotkov back in 2013, and then revived in 2017. In 2018 the
patch was taken over by James Coleman, who wrote and rewrote most of the
current code.
There were many reviewers/contributors since 2013 - I've done my best to
pick the most active ones, and listed them in this commit message.
Author: James Coleman, Alexander Korotkov
Reviewed-by: Tomas Vondra, Andreas Karlsson, Marti Raudsepp, Peter Geoghegan, Robert Haas, Thomas Munro, Antonin Houska, Andres Freund, Alexander Kuzmenkov
Discussion: https://postgr.es/m/CAPpHfdscOX5an71nHd8WSUH6GNOCf=V7wgDaTXdDd9=goN-gfA@mail.gmail.com
Discussion: https://postgr.es/m/CAPpHfds1waRZ=NOmueYq0sx1ZSCnt+5QJvizT8ndT2=etZEeAQ@mail.gmail.com
2020-04-06 21:33:28 +02:00
|
|
|
/*
|
2020-11-30 22:32:56 +01:00
|
|
|
* IncrementalSortPath represents an incremental sort step
|
|
|
|
*
|
|
|
|
* This is like a regular sort, except some leading key columns are assumed
|
|
|
|
* to be ordered already.
|
Implement Incremental Sort
Incremental Sort is an optimized variant of multikey sort for cases when
the input is already sorted by a prefix of the requested sort keys. For
example when the relation is already sorted by (key1, key2) and we need
to sort it by (key1, key2, key3) we can simply split the input rows into
groups having equal values in (key1, key2), and only sort/compare the
remaining column key3.
This has a number of benefits:
- Reduced memory consumption, because only a single group (determined by
values in the sorted prefix) needs to be kept in memory. This may also
eliminate the need to spill to disk.
- Lower startup cost, because Incremental Sort produce results after each
prefix group, which is beneficial for plans where startup cost matters
(like for example queries with LIMIT clause).
We consider both Sort and Incremental Sort, and decide based on costing.
The implemented algorithm operates in two different modes:
- Fetching a minimum number of tuples without check of equality on the
prefix keys, and sorting on all columns when safe.
- Fetching all tuples for a single prefix group and then sorting by
comparing only the remaining (non-prefix) keys.
We always start in the first mode, and employ a heuristic to switch into
the second mode if we believe it's beneficial - the goal is to minimize
the number of unnecessary comparions while keeping memory consumption
below work_mem.
This is a very old patch series. The idea was originally proposed by
Alexander Korotkov back in 2013, and then revived in 2017. In 2018 the
patch was taken over by James Coleman, who wrote and rewrote most of the
current code.
There were many reviewers/contributors since 2013 - I've done my best to
pick the most active ones, and listed them in this commit message.
Author: James Coleman, Alexander Korotkov
Reviewed-by: Tomas Vondra, Andreas Karlsson, Marti Raudsepp, Peter Geoghegan, Robert Haas, Thomas Munro, Antonin Houska, Andres Freund, Alexander Kuzmenkov
Discussion: https://postgr.es/m/CAPpHfdscOX5an71nHd8WSUH6GNOCf=V7wgDaTXdDd9=goN-gfA@mail.gmail.com
Discussion: https://postgr.es/m/CAPpHfds1waRZ=NOmueYq0sx1ZSCnt+5QJvizT8ndT2=etZEeAQ@mail.gmail.com
2020-04-06 21:33:28 +02:00
|
|
|
*/
|
|
|
|
typedef struct IncrementalSortPath
|
|
|
|
{
|
|
|
|
SortPath spath;
|
|
|
|
int nPresortedCols; /* number of presorted columns */
|
|
|
|
} IncrementalSortPath;
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
/*
|
|
|
|
* GroupPath represents grouping (of presorted input)
|
|
|
|
*
|
|
|
|
* groupClause represents the columns to be grouped on; the input path
|
|
|
|
* must be at least that well sorted.
|
|
|
|
*
|
|
|
|
* We can also apply a qual to the grouped rows (equivalent of HAVING)
|
|
|
|
*/
|
|
|
|
typedef struct GroupPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
List *groupClause; /* a list of SortGroupClause's */
|
|
|
|
List *qual; /* quals (HAVING quals), if any */
|
|
|
|
} GroupPath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* UpperUniquePath represents adjacent-duplicate removal (in presorted input)
|
|
|
|
*
|
|
|
|
* The columns to be compared are the first numkeys columns of the path's
|
|
|
|
* pathkeys. The input is presumed already sorted that way.
|
|
|
|
*/
|
|
|
|
typedef struct UpperUniquePath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
int numkeys; /* number of pathkey columns to compare */
|
|
|
|
} UpperUniquePath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AggPath represents generic computation of aggregate functions
|
|
|
|
*
|
|
|
|
* This may involve plain grouping (but not grouping sets), using either
|
|
|
|
* sorted or hashed grouping; for the AGG_SORTED case, the input must be
|
|
|
|
* appropriately presorted.
|
|
|
|
*/
|
|
|
|
typedef struct AggPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
AggStrategy aggstrategy; /* basic strategy, see nodes.h */
|
2016-06-26 20:33:38 +02:00
|
|
|
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality numGroups; /* estimated number of groups in input */
|
2020-02-28 18:32:35 +01:00
|
|
|
uint64 transitionSpace; /* for pass-by-ref transition data */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
List *groupClause; /* a list of SortGroupClause's */
|
|
|
|
List *qual; /* quals (HAVING quals), if any */
|
|
|
|
} AggPath;
|
|
|
|
|
2017-03-27 05:20:54 +02:00
|
|
|
/*
|
|
|
|
* Various annotations used for grouping sets in the planner.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct GroupingSetData
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2017-03-27 05:20:54 +02:00
|
|
|
NodeTag type;
|
|
|
|
List *set; /* grouping set as list of sortgrouprefs */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality numGroups; /* est. number of result groups */
|
2017-03-27 05:20:54 +02:00
|
|
|
} GroupingSetData;
|
|
|
|
|
|
|
|
typedef struct RollupData
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2017-03-27 05:20:54 +02:00
|
|
|
NodeTag type;
|
|
|
|
List *groupClause; /* applicable subset of parse->groupClause */
|
|
|
|
List *gsets; /* lists of integer indexes into groupClause */
|
|
|
|
List *gsets_data; /* list of GroupingSetData */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality numGroups; /* est. number of result groups */
|
2017-03-27 05:20:54 +02:00
|
|
|
bool hashable; /* can be hashed */
|
|
|
|
bool is_hashed; /* to be implemented as a hashagg */
|
|
|
|
} RollupData;
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
/*
|
|
|
|
* GroupingSetsPath represents a GROUPING SETS aggregation
|
|
|
|
*/
|
2017-03-27 05:20:54 +02:00
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
typedef struct GroupingSetsPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
2017-03-27 05:20:54 +02:00
|
|
|
AggStrategy aggstrategy; /* basic strategy */
|
|
|
|
List *rollups; /* list of RollupData */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
List *qual; /* quals (HAVING quals), if any */
|
2020-02-28 18:32:35 +01:00
|
|
|
uint64 transitionSpace; /* for pass-by-ref transition data */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
} GroupingSetsPath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MinMaxAggPath represents computation of MIN/MAX aggregates from indexes
|
|
|
|
*/
|
|
|
|
typedef struct MinMaxAggPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
List *mmaggregates; /* list of MinMaxAggInfo */
|
|
|
|
List *quals; /* HAVING quals, if any */
|
|
|
|
} MinMaxAggPath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* WindowAggPath represents generic computation of window functions
|
|
|
|
*/
|
|
|
|
typedef struct WindowAggPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
WindowClause *winclause; /* WindowClause we'll be using */
|
Teach planner and executor about monotonic window funcs
Window functions such as row_number() always return a value higher than
the previously returned value for tuples in any given window partition.
Traditionally queries such as;
SELECT * FROM (
SELECT *, row_number() over (order by c) rn
FROM t
) t WHERE rn <= 10;
were executed fairly inefficiently. Neither the query planner nor the
executor knew that once rn made it to 11 that nothing further would match
the outer query's WHERE clause. It would blindly continue until all
tuples were exhausted from the subquery.
Here we implement means to make the above execute more efficiently.
This is done by way of adding a pg_proc.prosupport function to various of
the built-in window functions and adding supporting code to allow the
support function to inform the planner if the window function is
monotonically increasing, monotonically decreasing, both or neither. The
planner is then able to make use of that information and possibly allow
the executor to short-circuit execution by way of adding a "run condition"
to the WindowAgg to allow it to determine if some of its execution work
can be skipped.
This "run condition" is not like a normal filter. These run conditions
are only built using quals comparing values to monotonic window functions.
For monotonic increasing functions, quals making use of the btree
operators for <, <= and = can be used (assuming the window function column
is on the left). You can see here that once such a condition becomes false
that a monotonic increasing function could never make it subsequently true
again. For monotonically decreasing functions the >, >= and = btree
operators for the given type can be used for run conditions.
The best-case situation for this is when there is a single WindowAgg node
without a PARTITION BY clause. Here when the run condition becomes false
the WindowAgg node can simply return NULL. No more tuples will ever match
the run condition. It's a little more complex when there is a PARTITION
BY clause. In this case, we cannot return NULL as we must still process
other partitions. To speed this case up we pull tuples from the outer
plan to check if they're from the same partition and simply discard them
if they are. When we find a tuple belonging to another partition we start
processing as normal again until the run condition becomes false or we run
out of tuples to process.
When there are multiple WindowAgg nodes to evaluate then this complicates
the situation. For intermediate WindowAggs we must ensure we always
return all tuples to the calling node. Any filtering done could lead to
incorrect results in WindowAgg nodes above. For all intermediate nodes,
we can still save some work when the run condition becomes false. We've
no need to evaluate the WindowFuncs anymore. Other WindowAgg nodes cannot
reference the value of these and these tuples will not appear in the final
result anyway. The savings here are small in comparison to what can be
saved in the top-level WingowAgg, but still worthwhile.
Intermediate WindowAgg nodes never filter out tuples, but here we change
WindowAgg so that the top-level WindowAgg filters out tuples that don't
match the intermediate WindowAgg node's run condition. Such filters
appear in the "Filter" clause in EXPLAIN for the top-level WindowAgg node.
Here we add prosupport functions to allow the above to work for;
row_number(), rank(), dense_rank(), count(*) and count(expr). It appears
technically possible to do the same for min() and max(), however, it seems
unlikely to be useful enough, so that's not done here.
Bump catversion
Author: David Rowley
Reviewed-by: Andy Fan, Zhihong Yu
Discussion: https://postgr.es/m/CAApHDvqvp3At8++yF8ij06sdcoo1S_b2YoaT9D4Nf+MObzsrLQ@mail.gmail.com
2022-04-08 00:34:36 +02:00
|
|
|
List *qual; /* lower-level WindowAgg runconditions */
|
|
|
|
bool topwindow; /* false for all apart from the WindowAgg
|
|
|
|
* that's closest to the root of the plan */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
} WindowAggPath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SetOpPath represents a set-operation, that is INTERSECT or EXCEPT
|
|
|
|
*/
|
|
|
|
typedef struct SetOpPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
SetOpCmd cmd; /* what to do, see nodes.h */
|
|
|
|
SetOpStrategy strategy; /* how to do it, see nodes.h */
|
|
|
|
List *distinctList; /* SortGroupClauses identifying target cols */
|
|
|
|
AttrNumber flagColIdx; /* where is the flag column, if any */
|
|
|
|
int firstFlag; /* flag value for first input relation */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality numGroups; /* estimated number of groups in input */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
} SetOpPath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* RecursiveUnionPath represents a recursive UNION node
|
|
|
|
*/
|
|
|
|
typedef struct RecursiveUnionPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *leftpath; /* paths representing input sources */
|
|
|
|
Path *rightpath;
|
|
|
|
List *distinctList; /* SortGroupClauses identifying target cols */
|
|
|
|
int wtParam; /* ID of Param representing work table */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality numGroups; /* estimated number of groups in input */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
} RecursiveUnionPath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* LockRowsPath represents acquiring row locks for SELECT FOR UPDATE/SHARE
|
|
|
|
*/
|
|
|
|
typedef struct LockRowsPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
List *rowMarks; /* a list of PlanRowMark's */
|
|
|
|
int epqParam; /* ID of Param for EvalPlanQual re-eval */
|
|
|
|
} LockRowsPath;
|
|
|
|
|
|
|
|
/*
|
2022-03-28 16:45:58 +02:00
|
|
|
* ModifyTablePath represents performing INSERT/UPDATE/DELETE/MERGE
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
*
|
|
|
|
* We represent most things that will be in the ModifyTable plan node
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
* literally, except we have a child Path not Plan. But analysis of the
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
* OnConflictExpr is deferred to createplan.c, as is collection of FDW data.
|
|
|
|
*/
|
|
|
|
typedef struct ModifyTablePath
|
|
|
|
{
|
|
|
|
Path path;
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
Path *subpath; /* Path producing source data */
|
2022-03-28 16:45:58 +02:00
|
|
|
CmdType operation; /* INSERT, UPDATE, DELETE, or MERGE */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
bool canSetTag; /* do we set the command tag/es_processed? */
|
|
|
|
Index nominalRelation; /* Parent RT index for use of EXPLAIN */
|
2018-10-07 20:33:17 +02:00
|
|
|
Index rootRelation; /* Root RT index, if target is partitioned */
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
bool partColsUpdated; /* some part key in hierarchy updated? */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
List *resultRelations; /* integer list of RT indexes */
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
List *updateColnosLists; /* per-target-table update_colnos lists */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
List *withCheckOptionLists; /* per-target-table WCO lists */
|
|
|
|
List *returningLists; /* per-target-table RETURNING tlists */
|
|
|
|
List *rowMarks; /* PlanRowMarks (non-locking only) */
|
|
|
|
OnConflictExpr *onconflict; /* ON CONFLICT clause, or NULL */
|
|
|
|
int epqParam; /* ID of Param for EvalPlanQual re-eval */
|
2022-03-28 16:45:58 +02:00
|
|
|
List *mergeActionLists; /* per-target-table lists of actions for
|
|
|
|
* MERGE */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
} ModifyTablePath;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* LimitPath represents applying LIMIT/OFFSET restrictions
|
|
|
|
*/
|
|
|
|
typedef struct LimitPath
|
|
|
|
{
|
|
|
|
Path path;
|
|
|
|
Path *subpath; /* path representing input source */
|
|
|
|
Node *limitOffset; /* OFFSET parameter, or NULL if none */
|
|
|
|
Node *limitCount; /* COUNT parameter, or NULL if none */
|
2020-04-07 22:22:13 +02:00
|
|
|
LimitOption limitOption; /* FETCH FIRST with ties or exact number */
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
} LimitPath;
|
|
|
|
|
|
|
|
|
1999-02-22 20:55:44 +01:00
|
|
|
/*
|
1999-07-25 01:21:14 +02:00
|
|
|
* Restriction clause info.
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
1999-07-25 01:21:14 +02:00
|
|
|
* We create one of these for each AND sub-clause of a restriction condition
|
2000-09-29 20:21:41 +02:00
|
|
|
* (WHERE or JOIN/ON clause). Since the restriction clauses are logically
|
|
|
|
* ANDed, we can use any one of them or any subset of them to filter out
|
|
|
|
* tuples, without having to evaluate the rest. The RestrictInfo node itself
|
|
|
|
* stores data used by the optimizer while choosing the best query plan.
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
2000-02-07 05:41:04 +01:00
|
|
|
* If a restriction clause references a single base relation, it will appear
|
|
|
|
* in the baserestrictinfo list of the RelOptInfo for that base rel.
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* If a restriction clause references more than one base+OJ relation, it will
|
2005-06-09 06:19:00 +02:00
|
|
|
* appear in the joininfo list of every RelOptInfo that describes a strict
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* subset of the relations mentioned in the clause. The joininfo lists are
|
1999-08-16 04:17:58 +02:00
|
|
|
* used to drive join tree building by selecting plausible join candidates.
|
2000-02-07 05:41:04 +01:00
|
|
|
* The clause cannot actually be applied until we have built a join rel
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* containing all the relations it references, however.
|
2000-02-07 05:41:04 +01:00
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* When we construct a join rel that includes all the relations referenced
|
2000-09-12 23:07:18 +02:00
|
|
|
* in a multi-relation restriction clause, we place that clause into the
|
|
|
|
* joinrestrictinfo lists of paths for the join rel, if neither left nor
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* right sub-path includes all relations referenced in the clause. The clause
|
2000-09-12 23:07:18 +02:00
|
|
|
* will be applied at that join level, and will not propagate any further up
|
|
|
|
* the join tree. (Note: the "predicate migration" code was once intended to
|
2000-02-07 05:41:04 +01:00
|
|
|
* push restriction clauses up and down the plan tree based on evaluation
|
|
|
|
* costs, but it's dead code and is unlikely to be resurrected in the
|
|
|
|
* foreseeable future.)
|
|
|
|
*
|
|
|
|
* Note that in the presence of more than two rels, a multi-rel restriction
|
|
|
|
* might reach different heights in the join tree depending on the join
|
|
|
|
* sequence we use. So, these clauses cannot be associated directly with
|
|
|
|
* the join RelOptInfo, but must be kept track of on a per-join-path basis.
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
2007-01-20 21:45:41 +01:00
|
|
|
* RestrictInfos that represent equivalence conditions (i.e., mergejoinable
|
|
|
|
* equalities that are not outerjoin-delayed) are handled a bit differently.
|
|
|
|
* Initially we attach them to the EquivalenceClasses that are derived from
|
|
|
|
* them. When we construct a scan or join path, we look through all the
|
|
|
|
* EquivalenceClasses and generate derived RestrictInfos representing the
|
|
|
|
* minimal set of conditions that need to be checked for this particular scan
|
|
|
|
* or join to enforce that all members of each EquivalenceClass are in fact
|
|
|
|
* equal in all rows emitted by the scan or join.
|
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* The clause_relids field lists the base plus outer-join RT indexes that
|
|
|
|
* actually appear in the clause. required_relids lists the minimum set of
|
|
|
|
* relids needed to evaluate the clause; while this is often equal to
|
|
|
|
* clause_relids, it can be more. We will add relids to required_relids when
|
|
|
|
* we need to force an outer join ON clause to be evaluated exactly at the
|
|
|
|
* level of the outer join, which is true except when it is a "degenerate"
|
|
|
|
* condition that references only Vars from the nullable side of the join.
|
|
|
|
*
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
* RestrictInfo nodes contain a flag to indicate whether a qual has been
|
2000-09-29 20:21:41 +02:00
|
|
|
* pushed down to a lower level than its original syntactic placement in the
|
|
|
|
* join tree would suggest. If an outer join prevents us from pushing a qual
|
|
|
|
* down to its "natural" semantic level (the level associated with just the
|
2005-06-09 06:19:00 +02:00
|
|
|
* base rels used in the qual) then we mark the qual with a "required_relids"
|
|
|
|
* value including more than just the base rels it actually uses. By
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
* pretending that the qual references all the rels required to form the outer
|
2000-09-29 20:21:41 +02:00
|
|
|
* join, we prevent it from being evaluated below the outer join's joinrel.
|
|
|
|
* When we do form the outer join's joinrel, we still need to distinguish
|
|
|
|
* those quals that are actually in that join's JOIN/ON condition from those
|
2007-02-16 21:57:19 +01:00
|
|
|
* that appeared elsewhere in the tree and were pushed down to the join rel
|
2004-01-05 06:07:36 +01:00
|
|
|
* because they used no other rels. That's what the is_pushed_down flag is
|
2007-02-16 21:57:19 +01:00
|
|
|
* for; it tells us that a qual is not an OUTER JOIN qual for the set of base
|
|
|
|
* rels listed in required_relids. A clause that originally came from WHERE
|
|
|
|
* or an INNER JOIN condition will *always* have its is_pushed_down flag set.
|
|
|
|
* It's possible for an OUTER JOIN clause to be marked is_pushed_down too,
|
|
|
|
* if we decide that it can be pushed down into the nullable side of the join.
|
|
|
|
* In that case it acts as a plain filter qual for wherever it gets evaluated.
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
* (In short, is_pushed_down is only false for non-degenerate outer join
|
2018-04-20 21:19:16 +02:00
|
|
|
* conditions. Possibly we should rename it to reflect that meaning? But
|
|
|
|
* see also the comments for RINFO_IS_PUSHED_DOWN, below.)
|
2004-01-05 06:07:36 +01:00
|
|
|
*
|
2023-05-25 16:28:33 +02:00
|
|
|
* There is also an incompatible_relids field, which is a set of outer-join
|
|
|
|
* relids above which we cannot evaluate the clause (because they might null
|
|
|
|
* Vars it uses that should not be nulled yet). In principle this could be
|
|
|
|
* filled in any RestrictInfo as the set of OJ relids that appear above the
|
|
|
|
* clause and null Vars that it uses. In practice we only bother to populate
|
|
|
|
* it for "clone" clauses, as it's currently only needed to prevent multiple
|
|
|
|
* clones of the same clause from being accepted for evaluation at the same
|
|
|
|
* join level.
|
|
|
|
*
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
* There is also an outer_relids field, which is NULL except for outer join
|
|
|
|
* clauses; for those, it is the set of relids on the outer side of the
|
|
|
|
* clause's outer join. (These are rels that the clause cannot be applied to
|
|
|
|
* in parameterized scans, since pushing it into the join's outer side would
|
|
|
|
* lead to wrong answers.)
|
|
|
|
*
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
* To handle security-barrier conditions efficiently, we mark RestrictInfo
|
|
|
|
* nodes with a security_level field, in which higher values identify clauses
|
|
|
|
* coming from less-trusted sources. The exact semantics are that a clause
|
|
|
|
* cannot be evaluated before another clause with a lower security_level value
|
|
|
|
* unless the first clause is leakproof. As with outer-join clauses, this
|
|
|
|
* creates a reason for clauses to sometimes need to be evaluated higher in
|
|
|
|
* the join tree than their contents would suggest; and even at a single plan
|
|
|
|
* node, this rule constrains the order of application of clauses.
|
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* In general, the referenced clause might be arbitrarily complex. The
|
|
|
|
* kinds of clauses we can handle as indexscan quals, mergejoin clauses,
|
2007-01-20 21:45:41 +01:00
|
|
|
* or hashjoin clauses are limited (e.g., no volatile functions). The code
|
|
|
|
* for each kind of path is responsible for identifying the restrict clauses
|
|
|
|
* it can use and ignoring the rest. Clauses not implemented by an indexscan,
|
2000-09-12 23:07:18 +02:00
|
|
|
* mergejoin, or hashjoin will be placed in the plan qual or joinqual field
|
2001-06-05 07:26:05 +02:00
|
|
|
* of the finished Plan node, where they will be enforced by general-purpose
|
1999-08-16 04:17:58 +02:00
|
|
|
* qual-expression-evaluation code. (But we are still entitled to count
|
|
|
|
* their selectivity when estimating the result tuple count, if we
|
|
|
|
* can guess what it is...)
|
2004-01-04 01:07:32 +01:00
|
|
|
*
|
|
|
|
* When the referenced clause is an OR clause, we generate a modified copy
|
|
|
|
* in which additional RestrictInfo nodes are inserted below the top-level
|
|
|
|
* OR/AND structure. This is a convenience for OR indexscan processing:
|
|
|
|
* indexquals taken from either the top level or an OR subclause will have
|
|
|
|
* associated RestrictInfo nodes.
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
*
|
|
|
|
* The can_join flag is set true if the clause looks potentially useful as
|
|
|
|
* a merge or hash join clause, that is if it is a binary opclause with
|
|
|
|
* nonoverlapping sets of relids referenced in the left and right sides.
|
|
|
|
* (Whether the operator is actually merge or hash joinable isn't checked,
|
|
|
|
* however.)
|
|
|
|
*
|
|
|
|
* The pseudoconstant flag is set true if the clause contains no Vars of
|
|
|
|
* the current query level and no volatile functions. Such a clause can be
|
|
|
|
* pulled out and used as a one-time qual in a gating Result node. We keep
|
|
|
|
* pseudoconstant clauses in the same lists as other RestrictInfos so that
|
|
|
|
* the regular clause-pushing machinery can assign them to the correct join
|
|
|
|
* level, but they need to be treated specially for cost and selectivity
|
|
|
|
* estimates. Note that a pseudoconstant clause can never be an indexqual
|
|
|
|
* or merge or hash join clause, so it's of no interest to large parts of
|
|
|
|
* the planner.
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* When we generate multiple versions of a clause so as to have versions
|
|
|
|
* that will work after commuting some left joins per outer join identity 3,
|
|
|
|
* we mark the one with the fewest nullingrels bits with has_clone = true,
|
|
|
|
* and the rest with is_clone = true. This allows proper filtering of
|
|
|
|
* these redundant clauses, so that we apply only one version of them.
|
|
|
|
*
|
2007-01-20 21:45:41 +01:00
|
|
|
* When join clauses are generated from EquivalenceClasses, there may be
|
|
|
|
* several equally valid ways to enforce join equivalence, of which we need
|
|
|
|
* apply only one. We mark clauses of this kind by setting parent_ec to
|
|
|
|
* point to the generating EquivalenceClass. Multiple clauses with the same
|
|
|
|
* parent_ec in the same join are redundant.
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
*
|
|
|
|
* Most fields are ignored for equality, since they may not be set yet, and
|
|
|
|
* should be derivable from the clause anyway.
|
|
|
|
*
|
|
|
|
* parent_ec, left_ec, right_ec are not printed, lest it lead to infinite
|
|
|
|
* recursion in plan tree dump.
|
1999-02-22 20:55:44 +01:00
|
|
|
*/
|
1996-08-28 03:59:28 +02:00
|
|
|
|
1999-02-03 21:15:53 +01:00
|
|
|
typedef struct RestrictInfo
|
1996-08-28 03:59:28 +02:00
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
NodeTag type;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* the represented clause of WHERE or JOIN */
|
|
|
|
Expr *clause;
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* true if clause was pushed down in level */
|
|
|
|
bool is_pushed_down;
|
2004-01-05 06:07:36 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* see comment above */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
bool can_join pg_node_attr(equal_ignore);
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* see comment above */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
bool pseudoconstant pg_node_attr(equal_ignore);
|
2003-12-31 00:53:15 +01:00
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* see comment above */
|
|
|
|
bool has_clone;
|
|
|
|
bool is_clone;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* true if known to contain no leaked Vars */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
bool leakproof pg_node_attr(equal_ignore);
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* indicates if clause contains any volatile functions */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
VolatileFunctionStatus has_volatile pg_node_attr(equal_ignore);
|
2021-03-29 03:47:05 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* see comment above */
|
|
|
|
Index security_level;
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* number of base rels in clause_relids */
|
|
|
|
int num_base_rels pg_node_attr(equal_ignore);
|
|
|
|
|
|
|
|
/* The relids (varnos+varnullingrels) actually referenced in the clause: */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Relids clause_relids pg_node_attr(equal_ignore);
|
2004-01-04 04:51:52 +01:00
|
|
|
|
2005-06-09 06:19:00 +02:00
|
|
|
/* The set of relids required to evaluate the clause: */
|
|
|
|
Relids required_relids;
|
|
|
|
|
2023-05-25 16:28:33 +02:00
|
|
|
/* Relids above which we cannot evaluate the clause (see comment above) */
|
|
|
|
Relids incompatible_relids;
|
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
/* If an outer-join clause, the outer-side relations, else NULL: */
|
|
|
|
Relids outer_relids;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* Relids in the left/right side of the clause. These fields are set for
|
|
|
|
* any binary opclause.
|
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Relids left_relids pg_node_attr(equal_ignore);
|
|
|
|
Relids right_relids pg_node_attr(equal_ignore);
|
2003-12-31 00:53:15 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* Modified clause with RestrictInfos. This field is NULL unless clause
|
|
|
|
* is an OR clause.
|
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Expr *orclause pg_node_attr(equal_ignore);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/*----------
|
|
|
|
* Serial number of this RestrictInfo. This is unique within the current
|
|
|
|
* PlannerInfo context, with a few critical exceptions:
|
|
|
|
* 1. When we generate multiple clones of the same qual condition to
|
|
|
|
* cope with outer join identity 3, all the clones get the same serial
|
|
|
|
* number. This reflects that we only want to apply one of them in any
|
|
|
|
* given plan.
|
|
|
|
* 2. If we manufacture a commuted version of a qual to use as an index
|
|
|
|
* condition, it copies the original's rinfo_serial, since it is in
|
|
|
|
* practice the same condition.
|
|
|
|
* 3. RestrictInfos made for a child relation copy their parent's
|
|
|
|
* rinfo_serial. Likewise, when an EquivalenceClass makes a derived
|
|
|
|
* equality clause for a child relation, it copies the rinfo_serial of
|
|
|
|
* the matching equality clause for the parent. This allows detection
|
|
|
|
* of redundant pushed-down equality clauses.
|
|
|
|
*----------
|
|
|
|
*/
|
|
|
|
int rinfo_serial;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* Generating EquivalenceClass. This field is NULL unless clause is
|
|
|
|
* potentially redundant.
|
|
|
|
*/
|
2022-12-02 21:20:30 +01:00
|
|
|
EquivalenceClass *parent_ec pg_node_attr(copy_as_scalar, equal_ignore, read_write_ignore);
|
2007-01-20 21:45:41 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* cache space for cost and selectivity
|
|
|
|
*/
|
2001-06-05 07:26:05 +02:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* eval cost of clause; -1 if not yet set */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
QualCost eval_cost pg_node_attr(equal_ignore);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Do assorted mop-up in the planner.
Remove RestrictInfo.nullable_relids, along with a good deal of
infrastructure that calculated it. One use-case for it was in
join_clause_is_movable_to, but we can now replace that usage with
a check to see if the clause's relids include any outer join
that can null the target relation. The other use-case was in
join_clause_is_movable_into, but that test can just be dropped
entirely now that the clause's relids include outer joins.
Furthermore, join_clause_is_movable_into should now be
accurate enough that it will accept anything returned by
generate_join_implied_equalities, so we can restore the Assert
that was diked out in commit 95f4e59c3.
Remove the outerjoin_delayed mechanism. We needed this before to
prevent quals from getting evaluated below outer joins that should
null some of their vars. Now that we consider varnullingrels while
placing quals, that's taken care of automatically, so throw the
whole thing away.
Teach remove_useless_result_rtes to also remove useless FromExprs.
Having done that, the delay_upper_joins flag serves no purpose any
more and we can remove it, largely reverting 11086f2f2.
Use constant TRUE for "dummy" clauses when throwing back outer joins.
This improves on a hack I introduced in commit 6a6522529. If we
have a left-join clause l.x = r.y, and a WHERE clause l.x = constant,
we generate r.y = constant and then don't really have a need for the
join clause. But we must throw the join clause back anyway after
marking it redundant, so that the join search heuristics won't think
this is a clauseless join and avoid it. That was a kluge introduced
under time pressure, and after looking at it I thought of a better
way: let's just introduce constant-TRUE "join clauses" instead,
and get rid of them at the end. This improves the generated plans for
such cases by not having to test a redundant join clause. We can also
get rid of the ugly hack used to mark such clauses as redundant for
selectivity estimation.
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
|
|
|
/* selectivity for "normal" (JOIN_INNER) semantics; -1 if not yet set */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Selectivity norm_selec pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* selectivity for outer join semantics; -1 if not yet set */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Selectivity outer_selec pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* opfamilies containing clause operator; valid if clause is
|
|
|
|
* mergejoinable, else NIL
|
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
List *mergeopfamilies pg_node_attr(equal_ignore);
|
2000-12-14 23:30:45 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* cache space for mergeclause processing; NULL if not yet set
|
|
|
|
*/
|
2002-03-01 07:01:20 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* EquivalenceClass containing lefthand */
|
2022-12-02 21:20:30 +01:00
|
|
|
EquivalenceClass *left_ec pg_node_attr(copy_as_scalar, equal_ignore, read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* EquivalenceClass containing righthand */
|
2022-12-02 21:20:30 +01:00
|
|
|
EquivalenceClass *right_ec pg_node_attr(copy_as_scalar, equal_ignore, read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* EquivalenceMember for lefthand */
|
2022-12-02 21:20:30 +01:00
|
|
|
EquivalenceMember *left_em pg_node_attr(copy_as_scalar, equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* EquivalenceMember for righthand */
|
2022-12-02 21:20:30 +01:00
|
|
|
EquivalenceMember *right_em pg_node_attr(copy_as_scalar, equal_ignore);
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* List of MergeScanSelCache structs. Those aren't Nodes, so hard to
|
|
|
|
* copy; instead replace with NIL. That has the effect that copying will
|
|
|
|
* just reset the cache. Likewise, can't compare or print them.
|
|
|
|
*/
|
|
|
|
List *scansel_cache pg_node_attr(copy_as(NIL), equal_ignore, read_write_ignore);
|
2000-12-14 23:30:45 +01:00
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/*
|
|
|
|
* transient workspace for use while considering a specific join path; T =
|
|
|
|
* outer var on left, F = on right
|
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
bool outer_is_left pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* copy of clause operator; valid if clause is hashjoinable, else
|
|
|
|
* InvalidOid
|
|
|
|
*/
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Oid hashjoinoperator pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* cache space for hashclause processing; -1 if not yet set
|
|
|
|
*/
|
|
|
|
/* avg bucketsize of left side */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Selectivity left_bucketsize pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* avg bucketsize of right side */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Selectivity right_bucketsize pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* left side's most common val's freq */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Selectivity left_mcvfreq pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
/* right side's most common val's freq */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Selectivity right_mcvfreq pg_node_attr(equal_ignore);
|
Add Result Cache executor node (take 2)
Here we add a new executor node type named "Result Cache". The planner
can include this node type in the plan to have the executor cache the
results from the inner side of parameterized nested loop joins. This
allows caching of tuples for sets of parameters so that in the event that
the node sees the same parameter values again, it can just return the
cached tuples instead of rescanning the inner side of the join all over
again. Internally, result cache uses a hash table in order to quickly
find tuples that have been previously cached.
For certain data sets, this can significantly improve the performance of
joins. The best cases for using this new node type are for join problems
where a large portion of the tuples from the inner side of the join have
no join partner on the outer side of the join. In such cases, hash join
would have to hash values that are never looked up, thus bloating the hash
table and possibly causing it to multi-batch. Merge joins would have to
skip over all of the unmatched rows. If we use a nested loop join with a
result cache, then we only cache tuples that have at least one join
partner on the outer side of the join. The benefits of using a
parameterized nested loop with a result cache increase when there are
fewer distinct values being looked up and the number of lookups of each
value is large. Also, hash probes to lookup the cache can be much faster
than the hash probe in a hash join as it's common that the result cache's
hash table is much smaller than the hash join's due to result cache only
caching useful tuples rather than all tuples from the inner side of the
join. This variation in hash probe performance is more significant when
the hash join's hash table no longer fits into the CPU's L3 cache, but the
result cache's hash table does. The apparent "random" access of hash
buckets with each hash probe can cause a poor L3 cache hit ratio for large
hash tables. Smaller hash tables generally perform better.
The hash table used for the cache limits itself to not exceeding work_mem
* hash_mem_multiplier in size. We maintain a dlist of keys for this cache
and when we're adding new tuples and realize we've exceeded the memory
budget, we evict cache entries starting with the least recently used ones
until we have enough memory to add the new tuples to the cache.
For parameterized nested loop joins, we now consider using one of these
result cache nodes in between the nested loop node and its inner node. We
determine when this might be useful based on cost, which is primarily
driven off of what the expected cache hit ratio will be. Estimating the
cache hit ratio relies on having good distinct estimates on the nested
loop's parameters.
For now, the planner will only consider using a result cache for
parameterized nested loop joins. This works for both normal joins and
also for LATERAL type joins to subqueries. It is possible to use this new
node for other uses in the future. For example, to cache results from
correlated subqueries. However, that's not done here due to some
difficulties obtaining a distinct estimation on the outer plan to
calculate the estimated cache hit ratio. Currently we plan the inner plan
before planning the outer plan so there is no good way to know if a result
cache would be useful or not since we can't estimate the number of times
the subplan will be called until the outer plan is generated.
The functionality being added here is newly introducing a dependency on
the return value of estimate_num_groups() during the join search.
Previously, during the join search, we only ever needed to perform
selectivity estimations. With this commit, we need to use
estimate_num_groups() in order to estimate what the hit ratio on the
result cache will be. In simple terms, if we expect 10 distinct values
and we expect 1000 outer rows, then we'll estimate the hit ratio to be
99%. Since cache hits are very cheap compared to scanning the underlying
nodes on the inner side of the nested loop join, then this will
significantly reduce the planner's cost for the join. However, it's
fairly easy to see here that things will go bad when estimate_num_groups()
incorrectly returns a value that's significantly lower than the actual
number of distinct values. If this happens then that may cause us to make
use of a nested loop join with a result cache instead of some other join
type, such as a merge or hash join. Our distinct estimations have been
known to be a source of trouble in the past, so the extra reliance on them
here could cause the planner to choose slower plans than it did previous
to having this feature. Distinct estimations are also fairly hard to
estimate accurately when several tables have been joined already or when a
WHERE clause filters out a set of values that are correlated to the
expressions we're estimating the number of distinct value for.
For now, the costing we perform during query planning for result caches
does put quite a bit of faith in the distinct estimations being accurate.
When these are accurate then we should generally see faster execution
times for plans containing a result cache. However, in the real world, we
may find that we need to either change the costings to put less trust in
the distinct estimations being accurate or perhaps even disable this
feature by default. There's always an element of risk when we teach the
query planner to do new tricks that it decides to use that new trick at
the wrong time and causes a regression. Users may opt to get the old
behavior by turning the feature off using the enable_resultcache GUC.
Currently, this is enabled by default. It remains to be seen if we'll
maintain that setting for the release.
Additionally, the name "Result Cache" is the best name I could think of
for this new node at the time I started writing the patch. Nobody seems
to strongly dislike the name. A few people did suggest other names but no
other name seemed to dominate in the brief discussion that there was about
names. Let's allow the beta period to see if the current name pleases
enough people. If there's some consensus on a better name, then we can
change it before the release. Please see the 2nd discussion link below
for the discussion on the "Result Cache" name.
Author: David Rowley
Reviewed-by: Andy Fan, Justin Pryzby, Zhihong Yu, Hou Zhijie
Tested-By: Konstantin Knizhnik
Discussion: https://postgr.es/m/CAApHDvrPcQyQdWERGYWx8J%2B2DLUNgXu%2BfOSbQ1UscxrunyXyrQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvq=yQXr5kqhRviT2RhNKwToaWr9JAN5t+5_PzhuRJ3wvg@mail.gmail.com
2021-04-02 03:10:56 +02:00
|
|
|
|
2021-11-08 02:40:33 +01:00
|
|
|
/* hash equality operators used for memoize nodes, else InvalidOid */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Oid left_hasheqoperator pg_node_attr(equal_ignore);
|
|
|
|
Oid right_hasheqoperator pg_node_attr(equal_ignore);
|
1999-08-16 04:17:58 +02:00
|
|
|
} RestrictInfo;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2018-04-20 21:19:16 +02:00
|
|
|
/*
|
|
|
|
* This macro embodies the correct way to test whether a RestrictInfo is
|
|
|
|
* "pushed down" to a given outer join, that is, should be treated as a filter
|
|
|
|
* clause rather than a join clause at that outer join. This is certainly so
|
|
|
|
* if is_pushed_down is true; but examining that is not sufficient anymore,
|
|
|
|
* because outer-join clauses will get pushed down to lower outer joins when
|
|
|
|
* we generate a path for the lower outer join that is parameterized by the
|
|
|
|
* LHS of the upper one. We can detect such a clause by noting that its
|
|
|
|
* required_relids exceed the scope of the join.
|
|
|
|
*/
|
|
|
|
#define RINFO_IS_PUSHED_DOWN(rinfo, joinrelids) \
|
|
|
|
((rinfo)->is_pushed_down || \
|
|
|
|
!bms_is_subset((rinfo)->required_relids, joinrelids))
|
|
|
|
|
2007-01-22 21:00:40 +01:00
|
|
|
/*
|
|
|
|
* Since mergejoinscansel() is a relatively expensive function, and would
|
|
|
|
* otherwise be invoked many times while planning a large join tree,
|
|
|
|
* we go out of our way to cache its results. Each mergejoinable
|
|
|
|
* RestrictInfo carries a list of the specific sort orderings that have
|
|
|
|
* been considered for use with it, and the resulting selectivities.
|
|
|
|
*/
|
|
|
|
typedef struct MergeScanSelCache
|
|
|
|
{
|
|
|
|
/* Ordering details (cache lookup key) */
|
|
|
|
Oid opfamily; /* btree opfamily defining the ordering */
|
2011-03-20 01:29:08 +01:00
|
|
|
Oid collation; /* collation for the ordering */
|
2007-01-22 21:00:40 +01:00
|
|
|
int strategy; /* sort direction (ASC or DESC) */
|
|
|
|
bool nulls_first; /* do NULLs come before normal values? */
|
|
|
|
/* Results */
|
2007-12-08 22:05:11 +01:00
|
|
|
Selectivity leftstartsel; /* first-join fraction for clause left side */
|
|
|
|
Selectivity leftendsel; /* last-join fraction for clause left side */
|
|
|
|
Selectivity rightstartsel; /* first-join fraction for clause right side */
|
|
|
|
Selectivity rightendsel; /* last-join fraction for clause right side */
|
2007-01-22 21:00:40 +01:00
|
|
|
} MergeScanSelCache;
|
|
|
|
|
2008-10-21 22:42:53 +02:00
|
|
|
/*
|
|
|
|
* Placeholder node for an expression to be evaluated below the top level
|
|
|
|
* of a plan tree. This is used during planning to represent the contained
|
|
|
|
* expression. At the end of the planning process it is replaced by either
|
|
|
|
* the contained expression or a Var referring to a lower-level evaluation of
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* the contained expression. Generally the evaluation occurs below an outer
|
2008-10-21 22:42:53 +02:00
|
|
|
* join, and Var references above the outer join might thereby yield NULL
|
|
|
|
* instead of the expression value.
|
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* phrels and phlevelsup correspond to the varno/varlevelsup fields of a
|
|
|
|
* plain Var, except that phrels has to be a relid set since the evaluation
|
|
|
|
* level of a PlaceHolderVar might be a join rather than a base relation.
|
|
|
|
* Likewise, phnullingrels corresponds to varnullingrels.
|
|
|
|
*
|
2008-10-21 22:42:53 +02:00
|
|
|
* Although the planner treats this as an expression node type, it is not
|
|
|
|
* recognized by the parser or executor, so we declare it here rather than
|
|
|
|
* in primnodes.h.
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
*
|
|
|
|
* We intentionally do not compare phexpr. Two PlaceHolderVars with the
|
|
|
|
* same ID and levelsup should be considered equal even if the contained
|
|
|
|
* expressions have managed to mutate to different states. This will
|
|
|
|
* happen during final plan construction when there are nested PHVs, since
|
|
|
|
* the inner PHV will get replaced by a Param in some copies of the outer
|
|
|
|
* PHV. Another way in which it can happen is that initplan sublinks
|
|
|
|
* could get replaced by differently-numbered Params when sublink folding
|
|
|
|
* is done. (The end result of such a situation would be some
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* unreferenced initplans, which is annoying but not really a problem.)
|
|
|
|
* On the same reasoning, there is no need to examine phrels. But we do
|
|
|
|
* need to compare phnullingrels, as that represents effects that are
|
|
|
|
* external to the original value of the PHV.
|
2008-10-21 22:42:53 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct PlaceHolderVar
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_query_jumble)
|
|
|
|
|
2008-10-21 22:42:53 +02:00
|
|
|
Expr xpr;
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/* the represented expression */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Expr *phexpr pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* base+OJ relids syntactically within expr src */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
Relids phrels pg_node_attr(equal_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/* RT indexes of outer joins that can null PHV's value */
|
|
|
|
Relids phnullingrels;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* ID for PHV (unique within planner run) */
|
|
|
|
Index phid;
|
|
|
|
|
|
|
|
/* > 0 if PHV belongs to outer query */
|
|
|
|
Index phlevelsup;
|
2008-10-21 22:42:53 +02:00
|
|
|
} PlaceHolderVar;
|
|
|
|
|
2008-08-14 20:48:00 +02:00
|
|
|
/*
|
|
|
|
* "Special join" info.
|
2005-12-20 03:30:36 +01:00
|
|
|
*
|
|
|
|
* One-sided outer joins constrain the order of joining partially but not
|
|
|
|
* completely. We flatten such joins into the planner's top-level list of
|
2008-08-14 20:48:00 +02:00
|
|
|
* relations to join, but record information about each outer join in a
|
|
|
|
* SpecialJoinInfo struct. These structs are kept in the PlannerInfo node's
|
|
|
|
* join_info_list.
|
|
|
|
*
|
|
|
|
* Similarly, semijoins and antijoins created by flattening IN (subselect)
|
|
|
|
* and EXISTS(subselect) clauses create partial constraints on join order.
|
|
|
|
* These are likewise recorded in SpecialJoinInfo structs.
|
|
|
|
*
|
|
|
|
* We make SpecialJoinInfos for FULL JOINs even though there is no flexibility
|
|
|
|
* of planning for them, because this simplifies make_join_rel()'s API.
|
2005-12-20 03:30:36 +01:00
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* min_lefthand and min_righthand are the sets of base+OJ relids that must be
|
|
|
|
* available on each side when performing the special join.
|
2005-12-20 03:30:36 +01:00
|
|
|
* It is not valid for either min_lefthand or min_righthand to be empty sets;
|
|
|
|
* if they were, this would break the logic that enforces join order.
|
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* syn_lefthand and syn_righthand are the sets of base+OJ relids that are
|
2008-08-14 20:48:00 +02:00
|
|
|
* syntactically below this special join. (These are needed to help compute
|
|
|
|
* min_lefthand and min_righthand for higher joins.)
|
2007-08-31 03:44:06 +02:00
|
|
|
*
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
* jointype is never JOIN_RIGHT; a RIGHT JOIN is handled by switching
|
2023-04-05 22:59:00 +02:00
|
|
|
* the inputs to make it a LEFT JOIN. It's never JOIN_RIGHT_ANTI either.
|
|
|
|
* So the allowed values of jointype in a join_info_list member are only
|
|
|
|
* LEFT, FULL, SEMI, or ANTI.
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
*
|
|
|
|
* ojrelid is the RT index of the join RTE representing this outer join,
|
|
|
|
* if there is one. It is zero when jointype is INNER or SEMI, and can be
|
|
|
|
* zero for jointype ANTI (if the join was transformed from a SEMI join).
|
|
|
|
* One use for this field is that when constructing the output targetlist of a
|
|
|
|
* join relation that implements this OJ, we add ojrelid to the varnullingrels
|
|
|
|
* and phnullingrels fields of nullable (RHS) output columns, so that the
|
|
|
|
* output Vars and PlaceHolderVars correctly reflect the nulling that has
|
|
|
|
* potentially happened to them.
|
|
|
|
*
|
|
|
|
* commute_above_l is filled with the relids of syntactically-higher outer
|
|
|
|
* joins that have been found to commute with this one per outer join identity
|
|
|
|
* 3 (see optimizer/README), when this join is in the LHS of the upper join
|
|
|
|
* (so, this is the lower join in the first form of the identity).
|
|
|
|
*
|
|
|
|
* commute_above_r is filled with the relids of syntactically-higher outer
|
|
|
|
* joins that have been found to commute with this one per outer join identity
|
|
|
|
* 3, when this join is in the RHS of the upper join (so, this is the lower
|
|
|
|
* join in the second form of the identity).
|
|
|
|
*
|
2023-05-17 17:13:52 +02:00
|
|
|
* commute_below_l is filled with the relids of syntactically-lower outer
|
|
|
|
* joins that have been found to commute with this one per outer join identity
|
|
|
|
* 3 and are in the LHS of this join (so, this is the upper join in the first
|
|
|
|
* form of the identity).
|
|
|
|
*
|
|
|
|
* commute_below_r is filled with the relids of syntactically-lower outer
|
|
|
|
* joins that have been found to commute with this one per outer join identity
|
|
|
|
* 3 and are in the RHS of this join (so, this is the upper join in the second
|
|
|
|
* form of the identity).
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
*
|
|
|
|
* lhs_strict is true if the special join's condition cannot succeed when the
|
|
|
|
* LHS variables are all NULL (this means that an outer join can commute with
|
|
|
|
* upper-level outer joins even if it appears in their RHS). We don't bother
|
|
|
|
* to set lhs_strict for FULL JOINs, however.
|
|
|
|
*
|
Improve planner's cost estimation in the presence of semijoins.
If we have a semijoin, say
SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y)
and we're estimating the cost of a parameterized indexscan on x, the number
of repetitions of the indexscan should not be taken as the size of y; it'll
really only be the number of distinct values of y1, because the only valid
plan with y on the outside of a nestloop would require y to be unique-ified
before joining it to x. Most of the time this doesn't make that much
difference, but sometimes it can lead to drastically underestimating the
cost of the indexscan and hence choosing a bad plan, as pointed out by
David Kubečka.
Fixing this is a bit difficult because parameterized indexscans are costed
out quite early in the planning process, before we have the information
that would be needed to call estimate_num_groups() and thereby estimate the
number of distinct values of the join column(s). However we can move the
code that extracts a semijoin RHS's unique-ification columns, so that it's
done in initsplan.c rather than on-the-fly in create_unique_path(). That
shouldn't make any difference speed-wise and it's really a bit cleaner too.
The other bit of information we need is the size of the semijoin RHS,
which is easy if it's a single relation (we make those estimates before
considering indexscan costs) but problematic if it's a join relation.
The solution adopted here is just to use the product of the sizes of the
join component rels. That will generally be an overestimate, but since
estimate_num_groups() only uses this input as a clamp, an overestimate
shouldn't hurt us too badly. In any case we don't allow this new logic
to produce a value larger than we would have chosen before, so that at
worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
|
|
|
* For a semijoin, we also extract the join operators and their RHS arguments
|
|
|
|
* and set semi_operators, semi_rhs_exprs, semi_can_btree, and semi_can_hash.
|
|
|
|
* This is done in support of possibly unique-ifying the RHS, so we don't
|
|
|
|
* bother unless at least one of semi_can_btree and semi_can_hash can be set
|
|
|
|
* true. (You might expect that this information would be computed during
|
|
|
|
* join planning; but it's helpful to have it available during planning of
|
|
|
|
* parameterized table scans, so we store it in the SpecialJoinInfo structs.)
|
2008-08-14 20:48:00 +02:00
|
|
|
*
|
|
|
|
* For purposes of join selectivity estimation, we create transient
|
|
|
|
* SpecialJoinInfo structures for regular inner joins; so it is possible
|
|
|
|
* to have jointype == JOIN_INNER in such a structure, even though this is
|
2009-02-25 04:30:38 +01:00
|
|
|
* not allowed within join_info_list. We also create transient
|
|
|
|
* SpecialJoinInfos with jointype == JOIN_INNER for outer joins, since for
|
|
|
|
* cost estimation purposes it is sometimes useful to know the join size under
|
Do assorted mop-up in the planner.
Remove RestrictInfo.nullable_relids, along with a good deal of
infrastructure that calculated it. One use-case for it was in
join_clause_is_movable_to, but we can now replace that usage with
a check to see if the clause's relids include any outer join
that can null the target relation. The other use-case was in
join_clause_is_movable_into, but that test can just be dropped
entirely now that the clause's relids include outer joins.
Furthermore, join_clause_is_movable_into should now be
accurate enough that it will accept anything returned by
generate_join_implied_equalities, so we can restore the Assert
that was diked out in commit 95f4e59c3.
Remove the outerjoin_delayed mechanism. We needed this before to
prevent quals from getting evaluated below outer joins that should
null some of their vars. Now that we consider varnullingrels while
placing quals, that's taken care of automatically, so throw the
whole thing away.
Teach remove_useless_result_rtes to also remove useless FromExprs.
Having done that, the delay_upper_joins flag serves no purpose any
more and we can remove it, largely reverting 11086f2f2.
Use constant TRUE for "dummy" clauses when throwing back outer joins.
This improves on a hack I introduced in commit 6a6522529. If we
have a left-join clause l.x = r.y, and a WHERE clause l.x = constant,
we generate r.y = constant and then don't really have a need for the
join clause. But we must throw the join clause back anyway after
marking it redundant, so that the join search heuristics won't think
this is a clauseless join and avoid it. That was a kluge introduced
under time pressure, and after looking at it I thought of a better
way: let's just introduce constant-TRUE "join clauses" instead,
and get rid of them at the end. This improves the generated plans for
such cases by not having to test a redundant join clause. We can also
get rid of the ugly hack used to mark such clauses as redundant for
selectivity estimation.
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:44:36 +01:00
|
|
|
* plain innerjoin semantics. Note that lhs_strict and the semi_xxx fields
|
|
|
|
* are not set meaningfully within such structs.
|
2005-12-20 03:30:36 +01:00
|
|
|
*/
|
2019-01-29 21:48:51 +01:00
|
|
|
#ifndef HAVE_SPECIALJOININFO_TYPEDEF
|
|
|
|
typedef struct SpecialJoinInfo SpecialJoinInfo;
|
|
|
|
#define HAVE_SPECIALJOININFO_TYPEDEF 1
|
|
|
|
#endif
|
2005-12-20 03:30:36 +01:00
|
|
|
|
2019-01-29 21:48:51 +01:00
|
|
|
struct SpecialJoinInfo
|
2005-12-20 03:30:36 +01:00
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2005-12-20 03:30:36 +01:00
|
|
|
NodeTag type;
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
Relids min_lefthand; /* base+OJ relids in minimum LHS for join */
|
|
|
|
Relids min_righthand; /* base+OJ relids in minimum RHS for join */
|
|
|
|
Relids syn_lefthand; /* base+OJ relids syntactically within LHS */
|
|
|
|
Relids syn_righthand; /* base+OJ relids syntactically within RHS */
|
2008-08-14 20:48:00 +02:00
|
|
|
JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
Index ojrelid; /* outer join's RT index; 0 if none */
|
|
|
|
Relids commute_above_l; /* commuting OJs above this one, if LHS */
|
|
|
|
Relids commute_above_r; /* commuting OJs above this one, if RHS */
|
2023-05-17 17:13:52 +02:00
|
|
|
Relids commute_below_l; /* commuting OJs in this one's LHS */
|
|
|
|
Relids commute_below_r; /* commuting OJs in this one's RHS */
|
2005-12-20 03:30:36 +01:00
|
|
|
bool lhs_strict; /* joinclause is strict for some LHS rel */
|
Improve planner's cost estimation in the presence of semijoins.
If we have a semijoin, say
SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y)
and we're estimating the cost of a parameterized indexscan on x, the number
of repetitions of the indexscan should not be taken as the size of y; it'll
really only be the number of distinct values of y1, because the only valid
plan with y on the outside of a nestloop would require y to be unique-ified
before joining it to x. Most of the time this doesn't make that much
difference, but sometimes it can lead to drastically underestimating the
cost of the indexscan and hence choosing a bad plan, as pointed out by
David Kubečka.
Fixing this is a bit difficult because parameterized indexscans are costed
out quite early in the planning process, before we have the information
that would be needed to call estimate_num_groups() and thereby estimate the
number of distinct values of the join column(s). However we can move the
code that extracts a semijoin RHS's unique-ification columns, so that it's
done in initsplan.c rather than on-the-fly in create_unique_path(). That
shouldn't make any difference speed-wise and it's really a bit cleaner too.
The other bit of information we need is the size of the semijoin RHS,
which is easy if it's a single relation (we make those estimates before
considering indexscan costs) but problematic if it's a join relation.
The solution adopted here is just to use the product of the sizes of the
join component rels. That will generally be an overestimate, but since
estimate_num_groups() only uses this input as a clamp, an overestimate
shouldn't hurt us too badly. In any case we don't allow this new logic
to produce a value larger than we would have chosen before, so that at
worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
|
|
|
/* Remaining fields are set only for JOIN_SEMI jointype: */
|
|
|
|
bool semi_can_btree; /* true if semi_operators are all btree */
|
|
|
|
bool semi_can_hash; /* true if semi_operators are all hash */
|
|
|
|
List *semi_operators; /* OIDs of equality join operators */
|
|
|
|
List *semi_rhs_exprs; /* righthand-side expressions of these ops */
|
2019-01-29 21:48:51 +01:00
|
|
|
};
|
2003-01-20 19:55:07 +01:00
|
|
|
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
/*
|
|
|
|
* Transient outer-join clause info.
|
|
|
|
*
|
|
|
|
* We set aside every outer join ON clause that looks mergejoinable,
|
|
|
|
* and process it specially at the end of qual distribution.
|
|
|
|
*/
|
|
|
|
typedef struct OuterJoinClauseInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
2023-01-30 19:16:20 +01:00
|
|
|
|
|
|
|
NodeTag type;
|
|
|
|
RestrictInfo *rinfo; /* a mergejoinable outer-join clause */
|
|
|
|
SpecialJoinInfo *sjinfo; /* the outer join's SpecialJoinInfo */
|
|
|
|
} OuterJoinClauseInfo;
|
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/*
|
|
|
|
* Append-relation info.
|
|
|
|
*
|
|
|
|
* When we expand an inheritable table or a UNION-ALL subselect into an
|
|
|
|
* "append relation" (essentially, a list of child RTEs), we build an
|
2017-09-14 21:41:08 +02:00
|
|
|
* AppendRelInfo for each child RTE. The list of AppendRelInfos indicates
|
|
|
|
* which child RTEs must be included when expanding the parent, and each node
|
2019-12-03 00:05:29 +01:00
|
|
|
* carries information needed to translate between columns of the parent and
|
|
|
|
* columns of the child.
|
|
|
|
*
|
|
|
|
* These structs are kept in the PlannerInfo node's append_rel_list, with
|
|
|
|
* append_rel_array[] providing a convenient lookup method for the struct
|
|
|
|
* associated with a particular child relid (there can be only one, though
|
|
|
|
* parent rels may have many entries in append_rel_list).
|
2006-01-31 22:39:25 +01:00
|
|
|
*
|
|
|
|
* Note: after completion of the planner prep phase, any given RTE is an
|
|
|
|
* append parent having entries in append_rel_list if and only if its
|
|
|
|
* "inh" flag is set. We clear "inh" for plain tables that turn out not
|
|
|
|
* to have inheritance children, and (in an abuse of the original meaning
|
|
|
|
* of the flag) we set "inh" for subquery RTEs that turn out to be
|
|
|
|
* flattenable UNION ALL queries. This lets us avoid useless searches
|
|
|
|
* of append_rel_list.
|
|
|
|
*
|
|
|
|
* Note: the data structure assumes that append-rel members are single
|
|
|
|
* baserels. This is OK for inheritance, but it prevents us from pulling
|
|
|
|
* up a UNION ALL member subquery if it contains a join. While that could
|
|
|
|
* be fixed with a more complex data structure, at present there's not much
|
|
|
|
* point because no improvement in the plan could result.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct AppendRelInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_query_jumble)
|
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
NodeTag type;
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/*
|
|
|
|
* These fields uniquely identify this append relationship. There can be
|
|
|
|
* (in fact, always should be) multiple AppendRelInfos for the same
|
|
|
|
* parent_relid, but never more than one per child_relid, since a given
|
|
|
|
* RTE cannot be a child of more than one append parent.
|
|
|
|
*/
|
|
|
|
Index parent_relid; /* RT index of append parent rel */
|
|
|
|
Index child_relid; /* RT index of append child rel */
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/*
|
|
|
|
* For an inheritance appendrel, the parent and child are both regular
|
|
|
|
* relations, and we store their rowtype OIDs here for use in translating
|
|
|
|
* whole-row Vars. For a UNION-ALL appendrel, the parent and child are
|
|
|
|
* both subqueries with no named rowtype, and we store InvalidOid here.
|
|
|
|
*/
|
|
|
|
Oid parent_reltype; /* OID of parent's composite type */
|
|
|
|
Oid child_reltype; /* OID of child's composite type */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The N'th element of this list is a Var or expression representing the
|
|
|
|
* child column corresponding to the N'th column of the parent. This is
|
|
|
|
* used to translate Vars referencing the parent rel into references to
|
|
|
|
* the child. A list element is NULL if it corresponds to a dropped
|
|
|
|
* column of the parent (this is only possible for inheritance cases, not
|
2008-11-11 19:13:32 +01:00
|
|
|
* UNION ALL). The list elements are always simple Vars for inheritance
|
|
|
|
* cases, but can be arbitrary expressions in UNION ALL cases.
|
2006-01-31 22:39:25 +01:00
|
|
|
*
|
|
|
|
* Notice we only store entries for user columns (attno > 0). Whole-row
|
|
|
|
* Vars are special-cased, and system columns (attno < 0) need no special
|
|
|
|
* translation since their attnos are the same for all tables.
|
|
|
|
*
|
|
|
|
* Caution: the Vars have varlevelsup = 0. Be careful to adjust as needed
|
|
|
|
* when copying into a subquery.
|
|
|
|
*/
|
|
|
|
List *translated_vars; /* Expressions in the child's Vars */
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2019-12-03 00:05:29 +01:00
|
|
|
/*
|
|
|
|
* This array simplifies translations in the reverse direction, from
|
|
|
|
* child's column numbers to parent's. The entry at [ccolno - 1] is the
|
|
|
|
* 1-based parent column number for child column ccolno, or zero if that
|
|
|
|
* child column is dropped or doesn't exist in the parent.
|
|
|
|
*/
|
|
|
|
int num_child_cols; /* length of array */
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
AttrNumber *parent_colnos pg_node_attr(array_size(num_child_cols));
|
2019-12-03 00:05:29 +01:00
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/*
|
|
|
|
* We store the parent table's OID here for inheritance, or InvalidOid for
|
|
|
|
* UNION ALL. This is only needed to help in generating error messages if
|
|
|
|
* an attempt is made to reference a dropped parent column.
|
|
|
|
*/
|
|
|
|
Oid parent_reloid; /* OID of parent relation */
|
|
|
|
} AppendRelInfo;
|
|
|
|
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
/*
|
2022-10-24 12:52:43 +02:00
|
|
|
* Information about a row-identity "resjunk" column in UPDATE/DELETE/MERGE.
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
*
|
2022-10-24 12:52:43 +02:00
|
|
|
* In partitioned UPDATE/DELETE/MERGE it's important for child partitions to
|
|
|
|
* share row-identity columns whenever possible, so as not to chew up too many
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
* targetlist columns. We use these structs to track which identity columns
|
|
|
|
* have been requested. In the finished plan, each of these will give rise
|
|
|
|
* to one resjunk entry in the targetlist of the ModifyTable's subplan node.
|
|
|
|
*
|
|
|
|
* All the Vars stored in RowIdentityVarInfos must have varno ROWID_VAR, for
|
|
|
|
* convenience of detecting duplicate requests. We'll replace that, in the
|
|
|
|
* final plan, with the varno of the generating rel.
|
|
|
|
*
|
|
|
|
* Outside this list, a Var with varno ROWID_VAR and varattno k is a reference
|
|
|
|
* to the k-th element of the row_identity_vars list (k counting from 1).
|
|
|
|
* We add such a reference to root->processed_tlist when creating the entry,
|
|
|
|
* and it propagates into the plan tree from there.
|
|
|
|
*/
|
|
|
|
typedef struct RowIdentityVarInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
Rework planning and execution of UPDATE and DELETE.
This patch makes two closely related sets of changes:
1. For UPDATE, the subplan of the ModifyTable node now only delivers
the new values of the changed columns (i.e., the expressions computed
in the query's SET clause) plus row identity information such as CTID.
ModifyTable must re-fetch the original tuple to merge in the old
values of any unchanged columns. The core advantage of this is that
the changed columns are uniform across all tables of an inherited or
partitioned target relation, whereas the other columns might not be.
A secondary advantage, when the UPDATE involves joins, is that less
data needs to pass through the plan tree. The disadvantage of course
is an extra fetch of each tuple to be updated. However, that seems to
be very nearly free in context; even worst-case tests don't show it to
add more than a couple percent to the total query cost. At some point
it might be interesting to combine the re-fetch with the tuple access
that ModifyTable must do anyway to mark the old tuple dead; but that
would require a good deal of refactoring and it seems it wouldn't buy
all that much, so this patch doesn't attempt it.
2. For inherited UPDATE/DELETE, instead of generating a separate
subplan for each target relation, we now generate a single subplan
that is just exactly like a SELECT's plan, then stick ModifyTable
on top of that. To let ModifyTable know which target relation a
given incoming row refers to, a tableoid junk column is added to
the row identity information. This gets rid of the horrid hack
that was inheritance_planner(), eliminating O(N^2) planning cost
and memory consumption in cases where there were many unprunable
target relations.
Point 2 of course requires point 1, so that there is a uniform
definition of the non-junk columns to be returned by the subplan.
We can't insist on uniform definition of the row identity junk
columns however, if we want to keep the ability to have both
plain and foreign tables in a partitioning hierarchy. Since
it wouldn't scale very far to have every child table have its
own row identity column, this patch includes provisions to merge
similar row identity columns into one column of the subplan result.
In particular, we can merge the whole-row Vars typically used as
row identity by FDWs into one column by pretending they are type
RECORD. (It's still okay for the actual composite Datums to be
labeled with the table's rowtype OID, though.)
There is more that can be done to file down residual inefficiencies
in this patch, but it seems to be committable now.
FDW authors should note several API changes:
* The argument list for AddForeignUpdateTargets() has changed, and so
has the method it must use for adding junk columns to the query. Call
add_row_identity_var() instead of manipulating the parse tree directly.
You might want to reconsider exactly what you're adding, too.
* PlanDirectModify() must now work a little harder to find the
ForeignScan plan node; if the foreign table is part of a partitioning
hierarchy then the ForeignScan might not be the direct child of
ModifyTable. See postgres_fdw for sample code.
* To check whether a relation is a target relation, it's no
longer sufficient to compare its relid to root->parse->resultRelation.
Instead, check it against all_result_relids or leaf_result_relids,
as appropriate.
Amit Langote and Tom Lane
Discussion: https://postgr.es/m/CA+HiwqHpHdqdDn48yCEhynnniahH78rwcrv1rEX65-fsZGBOLQ@mail.gmail.com
2021-03-31 17:52:34 +02:00
|
|
|
NodeTag type;
|
|
|
|
|
|
|
|
Var *rowidvar; /* Var to be evaluated (but varno=ROWID_VAR) */
|
|
|
|
int32 rowidwidth; /* estimated average width */
|
|
|
|
char *rowidname; /* name of the resjunk column */
|
|
|
|
Relids rowidrels; /* RTE indexes of target rels using this */
|
|
|
|
} RowIdentityVarInfo;
|
|
|
|
|
2008-10-21 22:42:53 +02:00
|
|
|
/*
|
|
|
|
* For each distinct placeholder expression generated during planning, we
|
|
|
|
* store a PlaceHolderInfo node in the PlannerInfo node's placeholder_list.
|
|
|
|
* This stores info that is needed centrally rather than in each copy of the
|
|
|
|
* PlaceHolderVar. The phid fields identify which PlaceHolderInfo goes with
|
|
|
|
* each PlaceHolderVar. Note that phid is unique throughout a planner run,
|
|
|
|
* not just within a query level --- this is so that we need not reassign ID's
|
|
|
|
* when pulling a subquery into its parent.
|
|
|
|
*
|
|
|
|
* The idea is to evaluate the expression at (only) the ph_eval_at join level,
|
|
|
|
* then allow it to bubble up like a Var until the ph_needed join level.
|
|
|
|
* ph_needed has the same definition as attr_needed for a regular Var.
|
2008-10-22 22:17:52 +02:00
|
|
|
*
|
2013-08-18 02:22:37 +02:00
|
|
|
* The PlaceHolderVar's expression might contain LATERAL references to vars
|
|
|
|
* coming from outside its syntactic scope. If so, those rels are *not*
|
|
|
|
* included in ph_eval_at, but they are recorded in ph_lateral.
|
|
|
|
*
|
2013-08-15 00:38:32 +02:00
|
|
|
* Notice that when ph_eval_at is a join rather than a single baserel, the
|
|
|
|
* PlaceHolderInfo may create constraints on join order: the ph_eval_at join
|
|
|
|
* has to be formed below any outer joins that should null the PlaceHolderVar.
|
2010-09-28 18:08:56 +02:00
|
|
|
*
|
2008-10-22 22:17:52 +02:00
|
|
|
* We create a PlaceHolderInfo only after determining that the PlaceHolderVar
|
2010-09-28 18:08:56 +02:00
|
|
|
* is actually referenced in the plan tree, so that unreferenced placeholders
|
|
|
|
* don't result in unnecessary constraints on join order.
|
2008-10-21 22:42:53 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct PlaceHolderInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2008-10-21 22:42:53 +02:00
|
|
|
NodeTag type;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* ID for PH (unique within planner run) */
|
|
|
|
Index phid;
|
|
|
|
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
/*
|
|
|
|
* copy of PlaceHolderVar tree (should be redundant for comparison, could
|
|
|
|
* be ignored)
|
|
|
|
*/
|
2022-07-02 12:33:07 +02:00
|
|
|
PlaceHolderVar *ph_var;
|
|
|
|
|
|
|
|
/* lowest level we can evaluate value at */
|
|
|
|
Relids ph_eval_at;
|
|
|
|
|
|
|
|
/* relids of contained lateral refs, if any */
|
|
|
|
Relids ph_lateral;
|
|
|
|
|
|
|
|
/* highest level the value is needed at */
|
|
|
|
Relids ph_needed;
|
|
|
|
|
|
|
|
/* estimated attribute width */
|
|
|
|
int32 ph_width;
|
2008-10-21 22:42:53 +02:00
|
|
|
} PlaceHolderInfo;
|
|
|
|
|
2010-11-04 17:01:17 +01:00
|
|
|
/*
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
* This struct describes one potentially index-optimizable MIN/MAX aggregate
|
|
|
|
* function. MinMaxAggPath contains a list of these, and if we accept that
|
|
|
|
* path, the list is stored into root->minmax_aggs for use during setrefs.c.
|
2010-11-04 17:01:17 +01:00
|
|
|
*/
|
|
|
|
typedef struct MinMaxAggInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2010-11-04 17:01:17 +01:00
|
|
|
NodeTag type;
|
|
|
|
|
2022-07-02 12:33:07 +02:00
|
|
|
/* pg_proc Oid of the aggregate */
|
|
|
|
Oid aggfnoid;
|
|
|
|
|
|
|
|
/* Oid of its sort operator */
|
|
|
|
Oid aggsortop;
|
|
|
|
|
|
|
|
/* expression we are aggregating on */
|
|
|
|
Expr *target;
|
|
|
|
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
/*
|
|
|
|
* modified "root" for planning the subquery; not printed, too large, not
|
|
|
|
* interesting enough
|
|
|
|
*/
|
|
|
|
PlannerInfo *subroot pg_node_attr(read_write_ignore);
|
2022-07-02 12:33:07 +02:00
|
|
|
|
|
|
|
/* access path for subquery */
|
|
|
|
Path *path;
|
|
|
|
|
|
|
|
/* estimated cost to fetch first row */
|
|
|
|
Cost pathcost;
|
|
|
|
|
|
|
|
/* param for subplan's output */
|
|
|
|
Param *param;
|
2010-11-04 17:01:17 +01:00
|
|
|
} MinMaxAggInfo;
|
|
|
|
|
2007-02-19 08:03:34 +01:00
|
|
|
/*
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
* At runtime, PARAM_EXEC slots are used to pass values around from one plan
|
|
|
|
* node to another. They can be used to pass values down into subqueries (for
|
|
|
|
* outer references in subqueries), or up out of subqueries (for the results
|
|
|
|
* of a subplan), or from a NestLoop plan node into its inner relation (when
|
|
|
|
* the inner scan is parameterized with values from the outer relation).
|
|
|
|
* The planner is responsible for assigning nonconflicting PARAM_EXEC IDs to
|
|
|
|
* the PARAM_EXEC Params it generates.
|
|
|
|
*
|
|
|
|
* Outer references are managed via root->plan_params, which is a list of
|
|
|
|
* PlannerParamItems. While planning a subquery, each parent query level's
|
|
|
|
* plan_params contains the values required from it by the current subquery.
|
|
|
|
* During create_plan(), we use plan_params to track values that must be
|
|
|
|
* passed from outer to inner sides of NestLoop plan nodes.
|
|
|
|
*
|
|
|
|
* The item a PlannerParamItem represents can be one of three kinds:
|
|
|
|
*
|
|
|
|
* A Var: the slot represents a variable of this level that must be passed
|
2010-07-12 19:01:06 +02:00
|
|
|
* down because subqueries have outer references to it, or must be passed
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
* from a NestLoop node to its inner scan. The varlevelsup value in the Var
|
|
|
|
* will always be zero.
|
2007-02-19 08:03:34 +01:00
|
|
|
*
|
2012-03-24 21:21:39 +01:00
|
|
|
* A PlaceHolderVar: this works much like the Var case, except that the
|
|
|
|
* entry is a PlaceHolderVar node with a contained expression. The PHV
|
|
|
|
* will have phlevelsup = 0, and the contained expression is adjusted
|
|
|
|
* to match in level.
|
2011-11-03 05:50:58 +01:00
|
|
|
*
|
2007-02-19 08:03:34 +01:00
|
|
|
* An Aggref (with an expression tree representing its argument): the slot
|
|
|
|
* represents an aggregate expression that is an outer reference for some
|
|
|
|
* subquery. The Aggref itself has agglevelsup = 0, and its argument tree
|
|
|
|
* is adjusted to match in level.
|
|
|
|
*
|
2011-11-03 05:50:58 +01:00
|
|
|
* Note: we detect duplicate Var and PlaceHolderVar parameters and coalesce
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
* them into one slot, but we do not bother to do that for Aggrefs.
|
|
|
|
* The scope of duplicate-elimination only extends across the set of
|
|
|
|
* parameters passed from one query level into a single subquery, or for
|
|
|
|
* nestloop parameters across the set of nestloop parameters used in a single
|
|
|
|
* query level. So there is no possibility of a PARAM_EXEC slot being used
|
|
|
|
* for conflicting purposes.
|
|
|
|
*
|
|
|
|
* In addition, PARAM_EXEC slots are assigned for Params representing outputs
|
|
|
|
* from subplans (values that are setParam items for those subplans). These
|
|
|
|
* IDs need not be tracked via PlannerParamItems, since we do not need any
|
|
|
|
* duplicate-elimination nor later processing of the represented expressions.
|
2017-11-13 21:24:12 +01:00
|
|
|
* Instead, we just record the assignment of the slot number by appending to
|
|
|
|
* root->glob->paramExecTypes.
|
2007-02-19 08:03:34 +01:00
|
|
|
*/
|
|
|
|
typedef struct PlannerParamItem
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
Automatically generate node support functions
Add a script to automatically generate the node support functions
(copy, equal, out, and read, as well as the node tags enum) from the
struct definitions.
For each of the four node support files, it creates two include files,
e.g., copyfuncs.funcs.c and copyfuncs.switch.c, to include in the main
file. All the scaffolding of the main file stays in place.
I have tried to mostly make the coverage of the output match what is
currently there. For example, one could now do out/read coverage of
utility statement nodes, but I have manually excluded those for now.
The reason is mainly that it's easier to diff the before and after,
and adding a bunch of stuff like this might require a separate
analysis and review.
Subtyping (TidScan -> Scan) is supported.
For the hard cases, you can just write a manual function and exclude
generating one. For the not so hard cases, there is a way of
annotating struct fields to get special behaviors. For example,
pg_node_attr(equal_ignore) has the field ignored in equal functions.
(In this patch, I have only ifdef'ed out the code to could be removed,
mainly so that it won't constantly have merge conflicts. It will be
deleted in a separate patch. All the code comments that are worth
keeping from those sections have already been moved to the header
files where the structs are defined.)
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/c1097590-a6a4-486a-64b1-e1f9cc0533ce%40enterprisedb.com
2022-07-09 08:52:19 +02:00
|
|
|
|
2007-02-19 08:03:34 +01:00
|
|
|
NodeTag type;
|
|
|
|
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
Node *item; /* the Var, PlaceHolderVar, or Aggref */
|
|
|
|
int paramId; /* its assigned PARAM_EXEC slot number */
|
2007-02-19 08:03:34 +01:00
|
|
|
} PlannerParamItem;
|
|
|
|
|
2012-01-28 01:26:38 +01:00
|
|
|
/*
|
2017-04-08 04:20:03 +02:00
|
|
|
* When making cost estimates for a SEMI/ANTI/inner_unique join, there are
|
|
|
|
* some correction factors that are needed in both nestloop and hash joins
|
2012-01-28 01:26:38 +01:00
|
|
|
* to account for the fact that the executor can stop scanning inner rows
|
|
|
|
* as soon as it finds a match to the current outer row. These numbers
|
|
|
|
* depend only on the selected outer and inner join relations, not on the
|
|
|
|
* particular paths used for them, so it's worthwhile to calculate them
|
|
|
|
* just once per relation pair not once per considered path. This struct
|
|
|
|
* is filled by compute_semi_anti_join_factors and must be passed along
|
|
|
|
* to the join cost estimation functions.
|
|
|
|
*
|
|
|
|
* outer_match_frac is the fraction of the outer tuples that are
|
|
|
|
* expected to have at least one match.
|
|
|
|
* match_count is the average number of matches expected for
|
|
|
|
* outer tuples that have at least one match.
|
|
|
|
*/
|
|
|
|
typedef struct SemiAntiJoinFactors
|
|
|
|
{
|
|
|
|
Selectivity outer_match_frac;
|
|
|
|
Selectivity match_count;
|
|
|
|
} SemiAntiJoinFactors;
|
|
|
|
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
/*
|
|
|
|
* Struct for extra information passed to subroutines of add_paths_to_joinrel
|
|
|
|
*
|
|
|
|
* restrictlist contains all of the RestrictInfo nodes for restriction
|
|
|
|
* clauses that apply to this join
|
|
|
|
* mergeclause_list is a list of RestrictInfo nodes for available
|
|
|
|
* mergejoin clauses in this join
|
2017-04-08 04:20:03 +02:00
|
|
|
* inner_unique is true if each outer tuple provably matches no more
|
|
|
|
* than one inner tuple
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
* sjinfo is extra info about special joins for selectivity estimation
|
2017-04-08 04:20:03 +02:00
|
|
|
* semifactors is as shown above (only valid for SEMI/ANTI/inner_unique joins)
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
* param_source_rels are OK targets for parameterization of result paths
|
|
|
|
*/
|
|
|
|
typedef struct JoinPathExtraData
|
|
|
|
{
|
|
|
|
List *restrictlist;
|
|
|
|
List *mergeclause_list;
|
2017-04-08 04:20:03 +02:00
|
|
|
bool inner_unique;
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
SpecialJoinInfo *sjinfo;
|
|
|
|
SemiAntiJoinFactors semifactors;
|
|
|
|
Relids param_source_rels;
|
|
|
|
} JoinPathExtraData;
|
|
|
|
|
Implement partition-wise grouping/aggregation.
If the partition keys of input relation are part of the GROUP BY
clause, all the rows belonging to a given group come from a single
partition. This allows aggregation/grouping over a partitioned
relation to be broken down * into aggregation/grouping on each
partition. This should be no worse, and often better, than the normal
approach.
If the GROUP BY clause does not contain all the partition keys, we can
still perform partial aggregation for each partition and then finalize
aggregation after appending the partial results. This is less certain
to be a win, but it's still useful.
Jeevan Chalke, Ashutosh Bapat, Robert Haas. The larger patch series
of which this patch is a part was also reviewed and tested by Antonin
Houska, Rajkumar Raghuwanshi, David Rowley, Dilip Kumar, Konstantin
Knizhnik, Pascal Legrand, and Rafia Sabih.
Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com
2018-03-22 17:49:48 +01:00
|
|
|
/*
|
|
|
|
* Various flags indicating what kinds of grouping are possible.
|
|
|
|
*
|
|
|
|
* GROUPING_CAN_USE_SORT should be set if it's possible to perform
|
|
|
|
* sort-based implementations of grouping. When grouping sets are in use,
|
|
|
|
* this will be true if sorting is potentially usable for any of the grouping
|
|
|
|
* sets, even if it's not usable for all of them.
|
|
|
|
*
|
|
|
|
* GROUPING_CAN_USE_HASH should be set if it's possible to perform
|
|
|
|
* hash-based implementations of grouping.
|
|
|
|
*
|
|
|
|
* GROUPING_CAN_PARTIAL_AGG should be set if the aggregation is of a type
|
|
|
|
* for which we support partial aggregation (not, for example, grouping sets).
|
|
|
|
* It says nothing about parallel-safety or the availability of suitable paths.
|
|
|
|
*/
|
|
|
|
#define GROUPING_CAN_USE_SORT 0x0001
|
|
|
|
#define GROUPING_CAN_USE_HASH 0x0002
|
|
|
|
#define GROUPING_CAN_PARTIAL_AGG 0x0004
|
|
|
|
|
|
|
|
/*
|
|
|
|
* What kind of partitionwise aggregation is in use?
|
|
|
|
*
|
|
|
|
* PARTITIONWISE_AGGREGATE_NONE: Not used.
|
|
|
|
*
|
|
|
|
* PARTITIONWISE_AGGREGATE_FULL: Aggregate each partition separately, and
|
|
|
|
* append the results.
|
|
|
|
*
|
|
|
|
* PARTITIONWISE_AGGREGATE_PARTIAL: Partially aggregate each partition
|
|
|
|
* separately, append the results, and then finalize aggregation.
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
PARTITIONWISE_AGGREGATE_NONE,
|
|
|
|
PARTITIONWISE_AGGREGATE_FULL,
|
|
|
|
PARTITIONWISE_AGGREGATE_PARTIAL
|
|
|
|
} PartitionwiseAggregateType;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Struct for extra information passed to subroutines of create_grouping_paths
|
|
|
|
*
|
|
|
|
* flags indicating what kinds of grouping are possible.
|
|
|
|
* partial_costs_set is true if the agg_partial_costs and agg_final_costs
|
|
|
|
* have been initialized.
|
|
|
|
* agg_partial_costs gives partial aggregation costs.
|
|
|
|
* agg_final_costs gives finalization costs.
|
|
|
|
* target_parallel_safe is true if target is parallel safe.
|
|
|
|
* havingQual gives list of quals to be applied after aggregation.
|
|
|
|
* targetList gives list of columns to be projected.
|
|
|
|
* patype is the type of partitionwise aggregation that is being performed.
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
/* Data which remains constant once set. */
|
|
|
|
int flags;
|
|
|
|
bool partial_costs_set;
|
|
|
|
AggClauseCosts agg_partial_costs;
|
|
|
|
AggClauseCosts agg_final_costs;
|
|
|
|
|
|
|
|
/* Data which may differ across partitions. */
|
|
|
|
bool target_parallel_safe;
|
|
|
|
Node *havingQual;
|
|
|
|
List *targetList;
|
|
|
|
PartitionwiseAggregateType patype;
|
|
|
|
} GroupPathExtraData;
|
|
|
|
|
2019-04-02 13:30:45 +02:00
|
|
|
/*
|
|
|
|
* Struct for extra information passed to subroutines of grouping_planner
|
|
|
|
*
|
2019-05-08 09:49:09 +02:00
|
|
|
* limit_needed is true if we actually need a Limit plan node.
|
2019-04-02 13:30:45 +02:00
|
|
|
* limit_tuples is an estimated bound on the number of output tuples,
|
2019-05-08 09:49:09 +02:00
|
|
|
* or -1 if no LIMIT or couldn't estimate.
|
2019-04-02 13:30:45 +02:00
|
|
|
* count_est and offset_est are the estimated values of the LIMIT and OFFSET
|
|
|
|
* expressions computed by preprocess_limit() (see comments for
|
|
|
|
* preprocess_limit() for more information).
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
bool limit_needed;
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality limit_tuples;
|
2019-04-02 13:30:45 +02:00
|
|
|
int64 count_est;
|
|
|
|
int64 offset_est;
|
|
|
|
} FinalPathExtraData;
|
|
|
|
|
2012-01-28 01:26:38 +01:00
|
|
|
/*
|
|
|
|
* For speed reasons, cost estimation for join paths is performed in two
|
|
|
|
* phases: the first phase tries to quickly derive a lower bound for the
|
|
|
|
* join cost, and then we check if that's sufficient to reject the path.
|
|
|
|
* If not, we come back for a more refined cost estimate. The first phase
|
|
|
|
* fills a JoinCostWorkspace struct with its preliminary cost estimates
|
|
|
|
* and possibly additional intermediate values. The second phase takes
|
|
|
|
* these values as inputs to avoid repeating work.
|
|
|
|
*
|
|
|
|
* (Ideally we'd declare this in cost.h, but it's also needed in pathnode.h,
|
|
|
|
* so seems best to put it here.)
|
|
|
|
*/
|
|
|
|
typedef struct JoinCostWorkspace
|
|
|
|
{
|
|
|
|
/* Preliminary cost estimates --- must not be larger than final ones! */
|
|
|
|
Cost startup_cost; /* cost expended before fetching any tuples */
|
|
|
|
Cost total_cost; /* total cost (assuming all tuples fetched) */
|
|
|
|
|
|
|
|
/* Fields below here should be treated as private to costsize.c */
|
|
|
|
Cost run_cost; /* non-startup cost components */
|
|
|
|
|
|
|
|
/* private for cost_nestloop code */
|
Fix planner's cost estimation for SEMI/ANTI joins with inner indexscans.
When the inner side of a nestloop SEMI or ANTI join is an indexscan that
uses all the join clauses as indexquals, it can be presumed that both
matched and unmatched outer rows will be processed very quickly: for
matched rows, we'll stop after fetching one row from the indexscan, while
for unmatched rows we'll have an indexscan that finds no matching index
entries, which should also be quick. The planner already knew about this,
but it was nonetheless charging for at least one full run of the inner
indexscan, as a consequence of concerns about the behavior of materialized
inner scans --- but those concerns don't apply in the fast case. If the
inner side has low cardinality (many matching rows) this could make an
indexscan plan look far more expensive than it actually is. To fix,
rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we
don't add the inner scan cost until we've inspected the indexquals, and
then we can add either the full-run cost or just the first tuple's cost as
appropriate.
Experimentation with this fix uncovered another problem: add_path and
friends were coded to disregard cheap startup cost when considering
parameterized paths. That's usually okay (and desirable, because it thins
the path herd faster); but in this fast case for SEMI/ANTI joins, it could
result in throwing away the desired plain indexscan path in favor of a
bitmap scan path before we ever get to the join costing logic. In the
many-matching-rows cases of interest here, a bitmap scan will do a lot more
work than required, so this is a problem. To fix, add a per-relation flag
consider_param_startup that works like the existing consider_startup flag,
but applies to parameterized paths, and set it for relations that are the
inside of a SEMI or ANTI join.
To make this patch reasonably safe to back-patch, care has been taken to
avoid changing the planner's behavior except in the very narrow case of
SEMI/ANTI joins with inner indexscans. There are places in
compare_path_costs_fuzzily and add_path_precheck that are not terribly
consistent with the new approach, but changing them will affect planner
decisions at the margins in other cases, so we'll leave that for a
HEAD-only fix.
Back-patch to 9.3; before that, the consider_startup flag didn't exist,
meaning that the second aspect of the patch would be too invasive.
Per a complaint from Peter Holzer and analysis by Tomas Vondra.
2015-06-03 17:58:47 +02:00
|
|
|
Cost inner_run_cost; /* also used by cost_mergejoin code */
|
2012-01-28 01:26:38 +01:00
|
|
|
Cost inner_rescan_run_cost;
|
|
|
|
|
|
|
|
/* private for cost_mergejoin code */
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality outer_rows;
|
|
|
|
Cardinality inner_rows;
|
|
|
|
Cardinality outer_skip_rows;
|
|
|
|
Cardinality inner_skip_rows;
|
2012-01-28 01:26:38 +01:00
|
|
|
|
|
|
|
/* private for cost_hashjoin code */
|
|
|
|
int numbuckets;
|
|
|
|
int numbatches;
|
2021-09-15 18:56:13 +02:00
|
|
|
Cardinality inner_rows_total;
|
2012-01-28 01:26:38 +01:00
|
|
|
} JoinCostWorkspace;
|
|
|
|
|
2020-11-24 09:45:00 +01:00
|
|
|
/*
|
|
|
|
* AggInfo holds information about an aggregate that needs to be computed.
|
|
|
|
* Multiple Aggrefs in a query can refer to the same AggInfo by having the
|
|
|
|
* same 'aggno' value, so that the aggregate is computed only once.
|
|
|
|
*/
|
|
|
|
typedef struct AggInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
2022-07-19 18:29:37 +02:00
|
|
|
|
|
|
|
NodeTag type;
|
|
|
|
|
2020-11-24 09:45:00 +01:00
|
|
|
/*
|
Improve performance of ORDER BY / DISTINCT aggregates
ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been
executed by always performing a sort in nodeAgg.c to sort the tuples in
the current group into the correct order before calling the transition
function on the sorted tuples. This was not great as often there might be
an index that could have provided pre-sorted input and allowed the
transition functions to be called as the rows come in, rather than having
to store them in a tuplestore in order to sort them once all the tuples
for the group have arrived.
Here we change the planner so it requests a path with a sort order which
supports the most amount of ORDER BY / DISTINCT aggregate functions and
add new code to the executor to allow it to support the processing of
ORDER BY / DISTINCT aggregates where the tuples are already sorted in the
correct order.
Since there can be many ORDER BY / DISTINCT aggregates in any given query
level, it's very possible that we can't find an order that suits all of
these aggregates. The sort order that the planner chooses is simply the
one that suits the most aggregate functions. We take the most strictly
sorted variation of each order and see how many aggregate functions can
use that, then we try again with the order of the remaining aggregates to
see if another order would suit more aggregate functions. For example:
SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ...
would request the sort order to be {a, b} because {a} is a subset of the
sort order of {a,b}, but;
SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ...
would just pick a plan ordered by {a} (we give precedence to aggregates
which are earlier in the targetlist).
SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ...
would choose to order by {b} since two aggregates suit that vs just one
that requires input ordered by {a}.
Author: David Rowley
Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane
Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
|
|
|
* List of Aggref exprs that this state value is for.
|
2020-11-24 09:45:00 +01:00
|
|
|
*
|
Improve performance of ORDER BY / DISTINCT aggregates
ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been
executed by always performing a sort in nodeAgg.c to sort the tuples in
the current group into the correct order before calling the transition
function on the sorted tuples. This was not great as often there might be
an index that could have provided pre-sorted input and allowed the
transition functions to be called as the rows come in, rather than having
to store them in a tuplestore in order to sort them once all the tuples
for the group have arrived.
Here we change the planner so it requests a path with a sort order which
supports the most amount of ORDER BY / DISTINCT aggregate functions and
add new code to the executor to allow it to support the processing of
ORDER BY / DISTINCT aggregates where the tuples are already sorted in the
correct order.
Since there can be many ORDER BY / DISTINCT aggregates in any given query
level, it's very possible that we can't find an order that suits all of
these aggregates. The sort order that the planner chooses is simply the
one that suits the most aggregate functions. We take the most strictly
sorted variation of each order and see how many aggregate functions can
use that, then we try again with the order of the remaining aggregates to
see if another order would suit more aggregate functions. For example:
SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ...
would request the sort order to be {a, b} because {a} is a subset of the
sort order of {a,b}, but;
SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ...
would just pick a plan ordered by {a} (we give precedence to aggregates
which are earlier in the targetlist).
SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ...
would choose to order by {b} since two aggregates suit that vs just one
that requires input ordered by {a}.
Author: David Rowley
Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane
Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
|
|
|
* There will always be at least one, but there can be multiple identical
|
|
|
|
* Aggref's sharing the same per-agg.
|
2020-11-24 09:45:00 +01:00
|
|
|
*/
|
Improve performance of ORDER BY / DISTINCT aggregates
ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been
executed by always performing a sort in nodeAgg.c to sort the tuples in
the current group into the correct order before calling the transition
function on the sorted tuples. This was not great as often there might be
an index that could have provided pre-sorted input and allowed the
transition functions to be called as the rows come in, rather than having
to store them in a tuplestore in order to sort them once all the tuples
for the group have arrived.
Here we change the planner so it requests a path with a sort order which
supports the most amount of ORDER BY / DISTINCT aggregate functions and
add new code to the executor to allow it to support the processing of
ORDER BY / DISTINCT aggregates where the tuples are already sorted in the
correct order.
Since there can be many ORDER BY / DISTINCT aggregates in any given query
level, it's very possible that we can't find an order that suits all of
these aggregates. The sort order that the planner chooses is simply the
one that suits the most aggregate functions. We take the most strictly
sorted variation of each order and see how many aggregate functions can
use that, then we try again with the order of the remaining aggregates to
see if another order would suit more aggregate functions. For example:
SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ...
would request the sort order to be {a, b} because {a} is a subset of the
sort order of {a,b}, but;
SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ...
would just pick a plan ordered by {a} (we give precedence to aggregates
which are earlier in the targetlist).
SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ...
would choose to order by {b} since two aggregates suit that vs just one
that requires input ordered by {a}.
Author: David Rowley
Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane
Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
2022-08-02 13:11:45 +02:00
|
|
|
List *aggrefs;
|
2020-11-24 09:45:00 +01:00
|
|
|
|
2022-07-19 18:29:37 +02:00
|
|
|
/* Transition state number for this aggregate */
|
2020-11-24 09:45:00 +01:00
|
|
|
int transno;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* "shareable" is false if this agg cannot share state values with other
|
|
|
|
* aggregates because the final function is read-write.
|
|
|
|
*/
|
|
|
|
bool shareable;
|
|
|
|
|
2022-07-19 18:29:37 +02:00
|
|
|
/* Oid of the final function, or InvalidOid if none */
|
2020-11-24 09:45:00 +01:00
|
|
|
Oid finalfn_oid;
|
|
|
|
} AggInfo;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AggTransInfo holds information about transition state that is used by one
|
|
|
|
* or more aggregates in the query. Multiple aggregates can share the same
|
|
|
|
* transition state, if they have the same inputs and the same transition
|
|
|
|
* function. Aggrefs that share the same transition info have the same
|
|
|
|
* 'aggtransno' value.
|
|
|
|
*/
|
|
|
|
typedef struct AggTransInfo
|
|
|
|
{
|
2023-02-13 01:07:33 +01:00
|
|
|
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
|
2022-07-19 18:29:37 +02:00
|
|
|
|
|
|
|
NodeTag type;
|
|
|
|
|
|
|
|
/* Inputs for this transition state */
|
2020-11-24 09:45:00 +01:00
|
|
|
List *args;
|
|
|
|
Expr *aggfilter;
|
|
|
|
|
|
|
|
/* Oid of the state transition function */
|
|
|
|
Oid transfn_oid;
|
|
|
|
|
2022-07-19 18:29:37 +02:00
|
|
|
/* Oid of the serialization function, or InvalidOid if none */
|
2020-11-24 09:45:00 +01:00
|
|
|
Oid serialfn_oid;
|
|
|
|
|
2022-07-19 18:29:37 +02:00
|
|
|
/* Oid of the deserialization function, or InvalidOid if none */
|
2020-11-24 09:45:00 +01:00
|
|
|
Oid deserialfn_oid;
|
|
|
|
|
2022-07-19 18:29:37 +02:00
|
|
|
/* Oid of the combine function, or InvalidOid if none */
|
2020-11-24 09:45:00 +01:00
|
|
|
Oid combinefn_oid;
|
|
|
|
|
|
|
|
/* Oid of state value's datatype */
|
|
|
|
Oid aggtranstype;
|
2022-07-19 18:29:37 +02:00
|
|
|
|
|
|
|
/* Additional data about transtype */
|
2020-11-24 09:45:00 +01:00
|
|
|
int32 aggtranstypmod;
|
|
|
|
int transtypeLen;
|
|
|
|
bool transtypeByVal;
|
2022-07-19 18:29:37 +02:00
|
|
|
|
|
|
|
/* Space-consumption estimate */
|
2020-11-24 09:45:00 +01:00
|
|
|
int32 aggtransspace;
|
|
|
|
|
2022-07-19 18:29:37 +02:00
|
|
|
/* Initial value from pg_aggregate entry */
|
|
|
|
Datum initValue pg_node_attr(read_write_ignore);
|
2020-11-24 09:45:00 +01:00
|
|
|
bool initValueIsNull;
|
|
|
|
} AggTransInfo;
|
|
|
|
|
2019-01-29 22:49:25 +01:00
|
|
|
#endif /* PATHNODES_H */
|