2017-11-15 16:23:28 +01:00
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
* execPartition.h
|
|
|
|
* POSTGRES partitioning executor interface
|
|
|
|
*
|
2018-01-03 05:30:12 +01:00
|
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2017-11-15 16:23:28 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/include/executor/execPartition.h
|
|
|
|
*--------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef EXECPARTITION_H
|
|
|
|
#define EXECPARTITION_H
|
|
|
|
|
|
|
|
#include "nodes/execnodes.h"
|
|
|
|
#include "nodes/parsenodes.h"
|
|
|
|
#include "nodes/plannodes.h"
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
#include "partitioning/partprune.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*-----------------------
|
|
|
|
* PartitionDispatch - information about one partitioned table in a partition
|
|
|
|
* hierarchy required to route a tuple to one of its partitions
|
|
|
|
*
|
|
|
|
* reldesc Relation descriptor of the table
|
|
|
|
* key Partition key information of the table
|
|
|
|
* keystate Execution state required for expressions in the partition key
|
|
|
|
* partdesc Partition descriptor of the table
|
|
|
|
* tupslot A standalone TupleTableSlot initialized with this table's tuple
|
|
|
|
* descriptor
|
|
|
|
* tupmap TupleConversionMap to convert from the parent's rowtype to
|
|
|
|
* this table's rowtype (when extracting the partition key of a
|
|
|
|
* tuple just before routing it through this table)
|
|
|
|
* indexes Array with partdesc->nparts members (for details on what
|
|
|
|
* individual members represent, see how they are set in
|
|
|
|
* get_partition_dispatch_recurse())
|
|
|
|
*-----------------------
|
|
|
|
*/
|
|
|
|
typedef struct PartitionDispatchData
|
|
|
|
{
|
2017-11-29 15:24:24 +01:00
|
|
|
Relation reldesc;
|
|
|
|
PartitionKey key;
|
|
|
|
List *keystate; /* list of ExprState */
|
|
|
|
PartitionDesc partdesc;
|
2017-11-15 16:23:28 +01:00
|
|
|
TupleTableSlot *tupslot;
|
|
|
|
TupleConversionMap *tupmap;
|
2017-11-29 15:24:24 +01:00
|
|
|
int *indexes;
|
2017-11-15 16:23:28 +01:00
|
|
|
} PartitionDispatchData;
|
|
|
|
|
|
|
|
typedef struct PartitionDispatchData *PartitionDispatch;
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
/*-----------------------
|
|
|
|
* PartitionTupleRouting - Encapsulates all information required to execute
|
|
|
|
* tuple-routing between partitions.
|
|
|
|
*
|
|
|
|
* partition_dispatch_info Array of PartitionDispatch objects with one
|
|
|
|
* entry for every partitioned table in the
|
|
|
|
* partition tree.
|
|
|
|
* num_dispatch number of partitioned tables in the partition
|
|
|
|
* tree (= length of partition_dispatch_info[])
|
2018-02-26 21:05:46 +01:00
|
|
|
* partition_oids Array of leaf partitions OIDs with one entry
|
|
|
|
* for every leaf partition in the partition tree,
|
|
|
|
* initialized in full by
|
|
|
|
* ExecSetupPartitionTupleRouting.
|
2018-01-04 21:48:15 +01:00
|
|
|
* partitions Array of ResultRelInfo* objects with one entry
|
2018-02-26 21:05:46 +01:00
|
|
|
* for every leaf partition in the partition tree,
|
|
|
|
* initialized lazily by ExecInitPartitionInfo.
|
2018-01-04 21:48:15 +01:00
|
|
|
* num_partitions Number of leaf partitions in the partition tree
|
2018-02-26 21:05:46 +01:00
|
|
|
* (= 'partitions_oid'/'partitions' array length)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
* parent_child_tupconv_maps Array of TupleConversionMap objects with one
|
2018-01-04 21:48:15 +01:00
|
|
|
* entry for every leaf partition (required to
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
* convert tuple from the root table's rowtype to
|
|
|
|
* a leaf partition's rowtype after tuple routing
|
|
|
|
* is done)
|
|
|
|
* child_parent_tupconv_maps Array of TupleConversionMap objects with one
|
|
|
|
* entry for every leaf partition (required to
|
|
|
|
* convert an updated tuple from the leaf
|
|
|
|
* partition's rowtype to the root table's rowtype
|
|
|
|
* so that tuple routing can be done)
|
|
|
|
* child_parent_map_not_required Array of bool. True value means that a map is
|
|
|
|
* determined to be not required for the given
|
|
|
|
* partition. False means either we haven't yet
|
|
|
|
* checked if a map is required, or it was
|
|
|
|
* determined to be required.
|
|
|
|
* subplan_partition_offsets Integer array ordered by UPDATE subplans. Each
|
|
|
|
* element of this array has the index into the
|
|
|
|
* corresponding partition in partitions array.
|
2018-01-24 22:34:51 +01:00
|
|
|
* num_subplan_partition_offsets Length of 'subplan_partition_offsets' array
|
2018-01-04 21:48:15 +01:00
|
|
|
* partition_tuple_slot TupleTableSlot to be used to manipulate any
|
|
|
|
* given leaf partition's rowtype after that
|
|
|
|
* partition is chosen for insertion by
|
|
|
|
* tuple-routing.
|
2018-04-26 20:54:46 +02:00
|
|
|
* root_tuple_slot TupleTableSlot to be used to transiently hold
|
|
|
|
* copy of a tuple that's being moved across
|
|
|
|
* partitions in the root partitioned table's
|
|
|
|
* rowtype
|
2018-01-04 21:48:15 +01:00
|
|
|
*-----------------------
|
|
|
|
*/
|
|
|
|
typedef struct PartitionTupleRouting
|
|
|
|
{
|
|
|
|
PartitionDispatch *partition_dispatch_info;
|
|
|
|
int num_dispatch;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
Oid *partition_oids;
|
2018-01-04 21:48:15 +01:00
|
|
|
ResultRelInfo **partitions;
|
|
|
|
int num_partitions;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
TupleConversionMap **parent_child_tupconv_maps;
|
|
|
|
TupleConversionMap **child_parent_tupconv_maps;
|
|
|
|
bool *child_parent_map_not_required;
|
|
|
|
int *subplan_partition_offsets;
|
2018-01-24 22:34:51 +01:00
|
|
|
int num_subplan_partition_offsets;
|
2018-01-04 21:48:15 +01:00
|
|
|
TupleTableSlot *partition_tuple_slot;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
TupleTableSlot *root_tuple_slot;
|
2018-01-04 21:48:15 +01:00
|
|
|
} PartitionTupleRouting;
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* PartitionedRelPruningData - Per-partitioned-table data for run-time pruning
|
2018-06-11 23:14:46 +02:00
|
|
|
* of partitions. For a multilevel partitioned table, we have one of these
|
2018-08-02 01:42:46 +02:00
|
|
|
* for the topmost partition plus one for each non-leaf child partition.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-11 23:14:46 +02:00
|
|
|
* subplan_map[] and subpart_map[] have the same definitions as in
|
2018-08-02 01:42:46 +02:00
|
|
|
* PartitionedRelPruneInfo (see plannodes.h); though note that here,
|
|
|
|
* subpart_map contains indexes into PartitionPruningData.partrelprunedata[].
|
2018-06-11 23:14:46 +02:00
|
|
|
*
|
|
|
|
* subplan_map Subplan index by partition index, or -1.
|
|
|
|
* subpart_map Subpart index by partition index, or -1.
|
2018-06-10 21:22:25 +02:00
|
|
|
* present_parts A Bitmapset of the partition indexes that we
|
2018-06-11 23:14:46 +02:00
|
|
|
* have subplans or subparts for.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
* context Contains the context details required to call
|
|
|
|
* the partition pruning code.
|
2018-06-10 21:22:25 +02:00
|
|
|
* pruning_steps List of PartitionPruneSteps used to
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
* perform the actual pruning.
|
2018-06-10 21:22:25 +02:00
|
|
|
* do_initial_prune true if pruning should be performed during
|
2018-06-11 23:14:46 +02:00
|
|
|
* executor startup (for this partitioning level).
|
2018-06-10 21:22:25 +02:00
|
|
|
* do_exec_prune true if pruning should be performed during
|
2018-06-11 23:14:46 +02:00
|
|
|
* executor run (for this partitioning level).
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
2018-08-02 01:42:46 +02:00
|
|
|
typedef struct PartitionedRelPruningData
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-06-11 00:24:34 +02:00
|
|
|
int *subplan_map;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
int *subpart_map;
|
|
|
|
Bitmapset *present_parts;
|
|
|
|
PartitionPruneContext context;
|
|
|
|
List *pruning_steps;
|
2018-06-10 21:22:25 +02:00
|
|
|
bool do_initial_prune;
|
|
|
|
bool do_exec_prune;
|
2018-08-02 01:42:46 +02:00
|
|
|
} PartitionedRelPruningData;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* PartitionPruningData - Holds all the run-time pruning information for
|
|
|
|
* a single partitioning hierarchy containing one or more partitions.
|
|
|
|
* partrelprunedata[] is an array ordered such that parents appear before
|
|
|
|
* their children; in particular, the first entry is the topmost partition,
|
|
|
|
* which was actually named in the SQL query.
|
|
|
|
*/
|
|
|
|
typedef struct PartitionPruningData
|
|
|
|
{
|
|
|
|
int num_partrelprunedata; /* number of array entries */
|
|
|
|
PartitionedRelPruningData partrelprunedata[FLEXIBLE_ARRAY_MEMBER];
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
} PartitionPruningData;
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
2018-06-11 00:24:34 +02:00
|
|
|
* PartitionPruneState - State object required for plan nodes to perform
|
2018-06-11 23:14:46 +02:00
|
|
|
* run-time partition pruning.
|
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* This struct can be attached to plan types which support arbitrary Lists of
|
2018-08-02 01:42:46 +02:00
|
|
|
* subplans containing partitions, to allow subplans to be eliminated due to
|
2018-06-11 00:24:34 +02:00
|
|
|
* the clauses being unable to match to any tuple that the subplan could
|
2018-08-02 01:42:46 +02:00
|
|
|
* possibly produce.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-10 21:22:25 +02:00
|
|
|
* execparamids Contains paramids of PARAM_EXEC Params found within
|
2018-06-11 00:24:34 +02:00
|
|
|
* any of the partprunedata structs. Pruning must be
|
|
|
|
* done again each time the value of one of these
|
|
|
|
* parameters changes.
|
2018-08-02 01:42:46 +02:00
|
|
|
* other_subplans Contains indexes of subplans that don't belong to any
|
|
|
|
* "partprunedata", e.g UNION ALL children that are not
|
|
|
|
* partitioned tables, or a partitioned table that the
|
|
|
|
* planner deemed run-time pruning to be useless for.
|
|
|
|
* These must not be pruned.
|
2018-06-11 00:24:34 +02:00
|
|
|
* prune_context A short-lived memory context in which to execute the
|
|
|
|
* partition pruning functions.
|
2018-08-02 01:42:46 +02:00
|
|
|
* do_initial_prune true if pruning should be performed during executor
|
|
|
|
* startup (at any hierarchy level).
|
|
|
|
* do_exec_prune true if pruning should be performed during
|
|
|
|
* executor run (at any hierarchy level).
|
|
|
|
* num_partprunedata Number of items in "partprunedata" array.
|
|
|
|
* partprunedata Array of PartitionPruningData pointers for the plan's
|
|
|
|
* partitioned relation(s), one for each partitioning
|
|
|
|
* hierarchy that requires run-time pruning.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
typedef struct PartitionPruneState
|
|
|
|
{
|
2018-06-10 21:22:25 +02:00
|
|
|
Bitmapset *execparamids;
|
2018-08-02 01:42:46 +02:00
|
|
|
Bitmapset *other_subplans;
|
2018-06-11 00:24:34 +02:00
|
|
|
MemoryContext prune_context;
|
2018-08-02 01:42:46 +02:00
|
|
|
bool do_initial_prune;
|
|
|
|
bool do_exec_prune;
|
|
|
|
int num_partprunedata;
|
|
|
|
PartitionPruningData *partprunedata[FLEXIBLE_ARRAY_MEMBER];
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
} PartitionPruneState;
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
Relation rel);
|
2017-11-15 16:23:28 +01:00
|
|
|
extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
|
|
|
|
PartitionDispatch *pd,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
|
2018-04-26 20:47:16 +02:00
|
|
|
ResultRelInfo *resultRelInfo,
|
|
|
|
PartitionTupleRouting *proute,
|
|
|
|
EState *estate, int partidx);
|
2018-04-07 01:16:11 +02:00
|
|
|
extern void ExecInitRoutingInfo(ModifyTableState *mtstate,
|
|
|
|
EState *estate,
|
|
|
|
PartitionTupleRouting *proute,
|
|
|
|
ResultRelInfo *partRelInfo,
|
|
|
|
int partidx);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute);
|
|
|
|
extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute,
|
|
|
|
ResultRelInfo *rootRelInfo, int leaf_index);
|
|
|
|
extern HeapTuple ConvertPartitionTupleSlot(TupleConversionMap *map,
|
|
|
|
HeapTuple tuple,
|
|
|
|
TupleTableSlot *new_slot,
|
2018-08-01 10:23:09 +02:00
|
|
|
TupleTableSlot **p_my_slot,
|
|
|
|
bool shouldFree);
|
2018-04-07 01:16:11 +02:00
|
|
|
extern void ExecCleanupTupleRouting(ModifyTableState *mtstate,
|
|
|
|
PartitionTupleRouting *proute);
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate,
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionPruneInfo *partitionpruneinfo);
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
extern void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate);
|
|
|
|
extern Bitmapset *ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate,
|
2018-06-11 00:24:34 +02:00
|
|
|
int nsubplans);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
#endif /* EXECPARTITION_H */
|