2017-11-15 16:23:28 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* execPartition.c
|
|
|
|
* Support routines for partitioning.
|
|
|
|
*
|
2018-01-03 05:30:12 +01:00
|
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2017-11-15 16:23:28 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/executor/execPartition.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2018-04-15 02:12:14 +02:00
|
|
|
#include "catalog/partition.h"
|
2018-04-08 20:35:29 +02:00
|
|
|
#include "catalog/pg_inherits.h"
|
2018-03-26 15:43:54 +02:00
|
|
|
#include "catalog/pg_type.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "executor/execPartition.h"
|
|
|
|
#include "executor/executor.h"
|
2018-04-07 01:16:11 +02:00
|
|
|
#include "foreign/fdwapi.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#include "miscadmin.h"
|
2018-03-26 15:43:54 +02:00
|
|
|
#include "nodes/makefuncs.h"
|
2018-04-15 02:12:14 +02:00
|
|
|
#include "partitioning/partbounds.h"
|
|
|
|
#include "partitioning/partprune.h"
|
2018-04-16 20:50:57 +02:00
|
|
|
#include "rewrite/rewriteManip.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "utils/lsyscache.h"
|
2018-04-15 02:12:14 +02:00
|
|
|
#include "utils/partcache.h"
|
|
|
|
#include "utils/rel.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "utils/rls.h"
|
|
|
|
#include "utils/ruleutils.h"
|
|
|
|
|
2018-04-15 02:12:14 +02:00
|
|
|
|
2018-09-15 00:06:57 +02:00
|
|
|
/*-----------------------
|
|
|
|
* PartitionDispatch - information about one partitioned table in a partition
|
|
|
|
* hierarchy required to route a tuple to one of its partitions
|
|
|
|
*
|
|
|
|
* reldesc Relation descriptor of the table
|
|
|
|
* key Partition key information of the table
|
|
|
|
* keystate Execution state required for expressions in the partition key
|
|
|
|
* partdesc Partition descriptor of the table
|
|
|
|
* tupslot A standalone TupleTableSlot initialized with this table's tuple
|
|
|
|
* descriptor
|
|
|
|
* tupmap TupleConversionMap to convert from the parent's rowtype to
|
|
|
|
* this table's rowtype (when extracting the partition key of a
|
|
|
|
* tuple just before routing it through this table)
|
|
|
|
* indexes Array with partdesc->nparts members (for details on what
|
|
|
|
* individual members represent, see how they are set in
|
|
|
|
* get_partition_dispatch_recurse())
|
|
|
|
*-----------------------
|
|
|
|
*/
|
|
|
|
typedef struct PartitionDispatchData
|
|
|
|
{
|
|
|
|
Relation reldesc;
|
|
|
|
PartitionKey key;
|
|
|
|
List *keystate; /* list of ExprState */
|
|
|
|
PartitionDesc partdesc;
|
|
|
|
TupleTableSlot *tupslot;
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
AttrNumber *tupmap;
|
2018-09-15 00:06:57 +02:00
|
|
|
int *indexes;
|
|
|
|
} PartitionDispatchData;
|
|
|
|
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
|
|
|
|
int *num_parted, List **leaf_part_oids);
|
|
|
|
static void get_partition_dispatch_recurse(Relation rel, Relation parent,
|
|
|
|
List **pds, List **leaf_part_oids);
|
|
|
|
static void FormPartitionKeyDatum(PartitionDispatch pd,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull);
|
2018-07-27 15:34:57 +02:00
|
|
|
static int get_partition_for_tuple(PartitionDispatch pd, Datum *values,
|
2018-04-15 02:12:14 +02:00
|
|
|
bool *isnull);
|
2017-11-15 16:23:28 +01:00
|
|
|
static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull,
|
|
|
|
int maxfieldlen);
|
2018-03-26 15:43:54 +02:00
|
|
|
static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map);
|
2018-08-02 01:42:46 +02:00
|
|
|
static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
|
|
|
|
PartitionedRelPruningData *pprune,
|
2018-06-10 21:22:25 +02:00
|
|
|
bool initial_prune,
|
|
|
|
Bitmapset **validsubplans);
|
2018-03-26 15:43:54 +02:00
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
2018-01-04 21:48:15 +01:00
|
|
|
* ExecSetupPartitionTupleRouting - sets up information needed during
|
|
|
|
* tuple routing for partitioned tables, encapsulates it in
|
|
|
|
* PartitionTupleRouting, and returns it.
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
|
|
|
* Note that all the relations in the partition tree are locked using the
|
|
|
|
* RowExclusiveLock mode upon return from this function.
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
*
|
|
|
|
* While we allocate the arrays of pointers of ResultRelInfo and
|
|
|
|
* TupleConversionMap for all partitions here, actual objects themselves are
|
|
|
|
* lazily allocated for a given partition if a tuple is actually routed to it;
|
|
|
|
* see ExecInitPartitionInfo. However, if the function is invoked for update
|
|
|
|
* tuple routing, caller would already have initialized ResultRelInfo's for
|
|
|
|
* some of the partitions, which are reused and assigned to their respective
|
2018-04-07 01:16:11 +02:00
|
|
|
* slot in the aforementioned array. For such partitions, we delay setting
|
|
|
|
* up objects such as TupleConversionMap until those are actually chosen as
|
|
|
|
* the partitions to route tuples to. See ExecPrepareTupleRouting.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
2018-01-04 21:48:15 +01:00
|
|
|
PartitionTupleRouting *
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
|
2017-11-15 16:23:28 +01:00
|
|
|
{
|
|
|
|
List *leaf_parts;
|
|
|
|
ListCell *cell;
|
|
|
|
int i;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ResultRelInfo *update_rri = NULL;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
int num_update_rri = 0,
|
|
|
|
update_rri_index = 0;
|
2018-01-04 21:48:15 +01:00
|
|
|
PartitionTupleRouting *proute;
|
2018-03-26 15:43:54 +02:00
|
|
|
int nparts;
|
|
|
|
ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the information about the partition tree after locking all the
|
|
|
|
* partitions.
|
|
|
|
*/
|
|
|
|
(void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
|
2018-01-04 21:48:15 +01:00
|
|
|
proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
|
|
|
|
proute->partition_dispatch_info =
|
|
|
|
RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch,
|
|
|
|
&leaf_parts);
|
2018-03-26 15:43:54 +02:00
|
|
|
proute->num_partitions = nparts = list_length(leaf_parts);
|
|
|
|
proute->partitions =
|
|
|
|
(ResultRelInfo **) palloc(nparts * sizeof(ResultRelInfo *));
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
proute->parent_child_tupconv_maps =
|
2018-03-26 15:43:54 +02:00
|
|
|
(TupleConversionMap **) palloc0(nparts * sizeof(TupleConversionMap *));
|
|
|
|
proute->partition_oids = (Oid *) palloc(nparts * sizeof(Oid));
|
2017-11-15 16:23:28 +01:00
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/* Set up details specific to the type of tuple routing we are doing. */
|
2018-03-26 15:43:54 +02:00
|
|
|
if (node && node->operation == CMD_UPDATE)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
{
|
|
|
|
update_rri = mtstate->resultRelInfo;
|
|
|
|
num_update_rri = list_length(node->plans);
|
|
|
|
proute->subplan_partition_offsets =
|
|
|
|
palloc(num_update_rri * sizeof(int));
|
2018-01-24 22:34:51 +01:00
|
|
|
proute->num_subplan_partition_offsets = num_update_rri;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We need an additional tuple slot for storing transient tuples that
|
|
|
|
* are converted to the root table descriptor.
|
|
|
|
*/
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
proute->root_tuple_slot = MakeTupleTableSlot(RelationGetDescr(rel));
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
}
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
i = 0;
|
|
|
|
foreach(cell, leaf_parts)
|
|
|
|
{
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ResultRelInfo *leaf_part_rri = NULL;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
Oid leaf_oid = lfirst_oid(cell);
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
proute->partition_oids[i] = leaf_oid;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
* If the leaf partition is already present in the per-subplan result
|
|
|
|
* rels, we re-use that rather than initialize a new result rel. The
|
|
|
|
* per-subplan resultrels and the resultrels of the leaf partitions
|
|
|
|
* are both in the same canonical order. So while going through the
|
|
|
|
* leaf partition oids, we need to keep track of the next per-subplan
|
|
|
|
* result rel to be looked for in the leaf partition resultrels.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
if (update_rri_index < num_update_rri &&
|
|
|
|
RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
{
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
leaf_part_rri = &update_rri[update_rri_index];
|
2018-02-08 20:29:05 +01:00
|
|
|
|
|
|
|
/*
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
* This is required in order to convert the partition's tuple to
|
|
|
|
* be compatible with the root partitioned table's tuple
|
|
|
|
* descriptor. When generating the per-subplan result rels, this
|
|
|
|
* was not set.
|
2018-02-08 20:29:05 +01:00
|
|
|
*/
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
leaf_part_rri->ri_PartitionRoot = rel;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/* Remember the subplan offset for this ResultRelInfo */
|
|
|
|
proute->subplan_partition_offsets[update_rri_index] = i;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
update_rri_index++;
|
|
|
|
}
|
2017-11-15 16:23:28 +01:00
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
proute->partitions[i] = leaf_part_rri;
|
2017-11-15 16:23:28 +01:00
|
|
|
i++;
|
|
|
|
}
|
2018-01-04 21:48:15 +01:00
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
|
|
|
* For UPDATE, we should have found all the per-subplan resultrels in the
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
* leaf partitions. (If this is an INSERT, both values will be zero.)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
*/
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
Assert(update_rri_index == num_update_rri);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
return proute;
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecFindPartition -- Find a leaf partition in the partition tree rooted
|
|
|
|
* at parent, for the heap tuple contained in *slot
|
|
|
|
*
|
|
|
|
* estate must be non-NULL; we'll need it to compute any expressions in the
|
|
|
|
* partition key(s)
|
|
|
|
*
|
|
|
|
* If no leaf partition is found, this routine errors out with the appropriate
|
|
|
|
* error message, else it returns the leaf partition sequence number
|
|
|
|
* as an index into the array of (ResultRelInfos of) all leaf partitions in
|
|
|
|
* the partition tree.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
|
|
|
|
TupleTableSlot *slot, EState *estate)
|
|
|
|
{
|
|
|
|
int result;
|
|
|
|
Datum values[PARTITION_MAX_KEYS];
|
|
|
|
bool isnull[PARTITION_MAX_KEYS];
|
|
|
|
Relation rel;
|
2018-08-01 21:06:47 +02:00
|
|
|
PartitionDispatch dispatch;
|
2017-11-15 16:23:28 +01:00
|
|
|
ExprContext *ecxt = GetPerTupleExprContext(estate);
|
|
|
|
TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
|
2018-08-01 21:06:47 +02:00
|
|
|
TupleTableSlot *myslot = NULL;
|
|
|
|
MemoryContext oldcxt;
|
|
|
|
|
|
|
|
/* use per-tuple context here to avoid leaking memory */
|
|
|
|
oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* First check the root table's partition constraint, if any. No point in
|
|
|
|
* routing the tuple if it doesn't belong in the root table itself.
|
|
|
|
*/
|
2018-06-11 22:53:33 +02:00
|
|
|
if (resultRelInfo->ri_PartitionCheck)
|
|
|
|
ExecPartitionCheck(resultRelInfo, slot, estate, true);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/* start with the root partitioned table */
|
2018-08-01 21:06:47 +02:00
|
|
|
dispatch = pd[0];
|
2017-11-15 16:23:28 +01:00
|
|
|
while (true)
|
|
|
|
{
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
AttrNumber *map = dispatch->tupmap;
|
2017-11-29 15:24:24 +01:00
|
|
|
int cur_index = -1;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
2018-08-01 21:06:47 +02:00
|
|
|
rel = dispatch->reldesc;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
2018-08-01 21:06:47 +02:00
|
|
|
* Convert the tuple to this parent's layout, if different from the
|
|
|
|
* current relation.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
2018-08-01 21:06:47 +02:00
|
|
|
myslot = dispatch->tupslot;
|
2017-11-15 16:23:28 +01:00
|
|
|
if (myslot != NULL && map != NULL)
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
slot = execute_attr_map_slot(map, slot, myslot);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Extract partition key from tuple. Expression evaluation machinery
|
|
|
|
* that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
|
|
|
|
* point to the correct tuple slot. The slot might have changed from
|
|
|
|
* what was used for the parent table if the table of the current
|
|
|
|
* partitioning level has different tuple descriptor from the parent.
|
|
|
|
* So update ecxt_scantuple accordingly.
|
|
|
|
*/
|
|
|
|
ecxt->ecxt_scantuple = slot;
|
2018-08-01 21:06:47 +02:00
|
|
|
FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
|
2017-12-01 16:01:50 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Nothing for get_partition_for_tuple() to do if there are no
|
|
|
|
* partitions to begin with.
|
|
|
|
*/
|
2018-08-01 21:06:47 +02:00
|
|
|
if (dispatch->partdesc->nparts == 0)
|
2017-12-01 16:01:50 +01:00
|
|
|
{
|
|
|
|
result = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-08-01 21:06:47 +02:00
|
|
|
cur_index = get_partition_for_tuple(dispatch, values, isnull);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* cur_index < 0 means we failed to find a partition of this parent.
|
|
|
|
* cur_index >= 0 means we either found the leaf partition, or the
|
|
|
|
* next parent to find a partition of.
|
|
|
|
*/
|
|
|
|
if (cur_index < 0)
|
|
|
|
{
|
|
|
|
result = -1;
|
|
|
|
break;
|
|
|
|
}
|
2018-08-01 21:06:47 +02:00
|
|
|
else if (dispatch->indexes[cur_index] >= 0)
|
2017-11-15 16:23:28 +01:00
|
|
|
{
|
2018-08-01 21:06:47 +02:00
|
|
|
result = dispatch->indexes[cur_index];
|
|
|
|
/* success! */
|
2017-11-15 16:23:28 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
2018-08-01 21:06:47 +02:00
|
|
|
{
|
|
|
|
/* move down one level */
|
|
|
|
dispatch = pd[-dispatch->indexes[cur_index]];
|
|
|
|
}
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
|
|
|
|
2018-08-01 21:06:47 +02:00
|
|
|
/* Release the tuple in the lowest parent's dedicated slot. */
|
|
|
|
if (slot == myslot)
|
|
|
|
ExecClearTuple(myslot);
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
/* A partition was not found. */
|
|
|
|
if (result < 0)
|
|
|
|
{
|
|
|
|
char *val_desc;
|
|
|
|
|
|
|
|
val_desc = ExecBuildSlotPartitionKeyDescription(rel,
|
|
|
|
values, isnull, 64);
|
|
|
|
Assert(OidIsValid(RelationGetRelid(rel)));
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
|
|
|
errmsg("no partition of relation \"%s\" found for row",
|
|
|
|
RelationGetRelationName(rel)),
|
|
|
|
val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
|
|
|
|
}
|
|
|
|
|
2018-08-01 21:06:47 +02:00
|
|
|
MemoryContextSwitchTo(oldcxt);
|
2017-11-15 16:23:28 +01:00
|
|
|
ecxt->ecxt_scantuple = ecxt_scantuple_old;
|
2018-08-01 21:06:47 +02:00
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/*
|
|
|
|
* ExecInitPartitionInfo
|
2018-04-25 04:00:48 +02:00
|
|
|
* Initialize ResultRelInfo and other information for a partition
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
*
|
|
|
|
* Returns the ResultRelInfo
|
|
|
|
*/
|
|
|
|
ResultRelInfo *
|
|
|
|
ExecInitPartitionInfo(ModifyTableState *mtstate,
|
|
|
|
ResultRelInfo *resultRelInfo,
|
|
|
|
PartitionTupleRouting *proute,
|
|
|
|
EState *estate, int partidx)
|
|
|
|
{
|
2018-04-07 01:16:11 +02:00
|
|
|
ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
Relation rootrel = resultRelInfo->ri_RelationDesc,
|
|
|
|
partrel;
|
2018-04-16 20:50:57 +02:00
|
|
|
Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ResultRelInfo *leaf_part_rri;
|
|
|
|
MemoryContext oldContext;
|
2018-04-16 20:50:57 +02:00
|
|
|
AttrNumber *part_attnos = NULL;
|
|
|
|
bool found_whole_row;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We locked all the partitions in ExecSetupPartitionTupleRouting
|
|
|
|
* including the leaf partitions.
|
|
|
|
*/
|
|
|
|
partrel = heap_open(proute->partition_oids[partidx], NoLock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep ResultRelInfo and other information for this partition in the
|
|
|
|
* per-query memory context so they'll survive throughout the query.
|
|
|
|
*/
|
|
|
|
oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
|
2018-03-26 15:43:54 +02:00
|
|
|
leaf_part_rri = makeNode(ResultRelInfo);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
InitResultRelInfo(leaf_part_rri,
|
|
|
|
partrel,
|
|
|
|
node ? node->nominalRelation : 1,
|
|
|
|
rootrel,
|
|
|
|
estate->es_instrument);
|
|
|
|
|
Fix interaction of foreign tuple routing with remote triggers.
Without these fixes, changes to the inserted tuple made by remote
triggers are ignored when building local RETURNING tuples.
In the core code, call ExecInitRoutingInfo at a later point from
within ExecInitPartitionInfo so that the FDW callback gets invoked
after the returning list has been built. But move CheckValidResultRel
out of ExecInitRoutingInfo so that it can happen at an earlier stage.
In postgres_fdw, refactor assorted deparsing functions to work with
the RTE rather than the PlannerInfo, which saves us having to
construct a fake PlannerInfo in cases where we don't have a real one.
Then, we can pass down a constructed RTE that yields the correct
deparse result when no real one exists. Unfortunately, this
necessitates a hack that understands how the core code manages RT
indexes for update tuple routing, which is ugly, but we don't have a
better idea right now.
Original report, analysis, and patch by Etsuro Fujita. Heavily
refactored by me. Then worked over some more by Amit Langote.
Discussion: http://postgr.es/m/5AD4882B.10002@lab.ntt.co.jp
2018-05-01 19:21:46 +02:00
|
|
|
/*
|
|
|
|
* Verify result relation is a valid target for an INSERT. An UPDATE of a
|
|
|
|
* partition-key becomes a DELETE+INSERT operation, so this check is still
|
|
|
|
* required when the operation is CMD_UPDATE.
|
|
|
|
*/
|
|
|
|
CheckValidResultRel(leaf_part_rri, CMD_INSERT);
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/*
|
|
|
|
* Since we've just initialized this ResultRelInfo, it's not in any list
|
|
|
|
* attached to the estate as yet. Add it, so that it can be found later.
|
|
|
|
*
|
|
|
|
* Note that the entries in this list appear in no predetermined order,
|
|
|
|
* because partition result rels are initialized as and when they're
|
|
|
|
* needed.
|
|
|
|
*/
|
|
|
|
estate->es_tuple_routing_result_relations =
|
|
|
|
lappend(estate->es_tuple_routing_result_relations,
|
|
|
|
leaf_part_rri);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Open partition indices. The user may have asked to check for conflicts
|
|
|
|
* within this leaf partition and do "nothing" instead of throwing an
|
|
|
|
* error. Be prepared in that case by initializing the index information
|
|
|
|
* needed by ExecInsert() to perform speculative insertions.
|
|
|
|
*/
|
|
|
|
if (partrel->rd_rel->relhasindex &&
|
|
|
|
leaf_part_rri->ri_IndexRelationDescs == NULL)
|
|
|
|
ExecOpenIndices(leaf_part_rri,
|
2018-03-19 22:09:43 +01:00
|
|
|
(node != NULL &&
|
|
|
|
node->onConflictAction != ONCONFLICT_NONE));
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Build WITH CHECK OPTION constraints for the partition. Note that we
|
|
|
|
* didn't build the withCheckOptionList for partitions within the planner,
|
|
|
|
* but simple translation of varattnos will suffice. This only occurs for
|
|
|
|
* the INSERT case or in the case of UPDATE tuple routing where we didn't
|
|
|
|
* find a result rel to reuse in ExecSetupPartitionTupleRouting().
|
|
|
|
*/
|
|
|
|
if (node && node->withCheckOptionLists != NIL)
|
|
|
|
{
|
|
|
|
List *wcoList;
|
|
|
|
List *wcoExprs = NIL;
|
|
|
|
ListCell *ll;
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the case of INSERT on a partitioned table, there is only one
|
|
|
|
* plan. Likewise, there is only one WCO list, not one per partition.
|
|
|
|
* For UPDATE, there are as many WCO lists as there are plans.
|
|
|
|
*/
|
|
|
|
Assert((node->operation == CMD_INSERT &&
|
|
|
|
list_length(node->withCheckOptionLists) == 1 &&
|
|
|
|
list_length(node->plans) == 1) ||
|
|
|
|
(node->operation == CMD_UPDATE &&
|
|
|
|
list_length(node->withCheckOptionLists) ==
|
|
|
|
list_length(node->plans)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the WCO list of the first plan as a reference to calculate
|
|
|
|
* attno's for the WCO list of this partition. In the INSERT case,
|
|
|
|
* that refers to the root partitioned table, whereas in the UPDATE
|
|
|
|
* tuple routing case, that refers to the first partition in the
|
|
|
|
* mtstate->resultRelInfo array. In any case, both that relation and
|
|
|
|
* this partition should have the same columns, so we should be able
|
|
|
|
* to map attributes successfully.
|
|
|
|
*/
|
|
|
|
wcoList = linitial(node->withCheckOptionLists);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert Vars in it to contain this partition's attribute numbers.
|
|
|
|
*/
|
2018-04-19 21:46:53 +02:00
|
|
|
part_attnos =
|
|
|
|
convert_tuples_by_name_map(RelationGetDescr(partrel),
|
|
|
|
RelationGetDescr(firstResultRel),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
wcoList = (List *)
|
|
|
|
map_variable_attnos((Node *) wcoList,
|
|
|
|
firstVarno, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
2018-04-16 20:50:57 +02:00
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
foreach(ll, wcoList)
|
|
|
|
{
|
|
|
|
WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
|
|
|
|
ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
|
2018-03-06 02:49:59 +01:00
|
|
|
&mtstate->ps);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
wcoExprs = lappend(wcoExprs, wcoExpr);
|
|
|
|
}
|
|
|
|
|
|
|
|
leaf_part_rri->ri_WithCheckOptions = wcoList;
|
|
|
|
leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build the RETURNING projection for the partition. Note that we didn't
|
|
|
|
* build the returningList for partitions within the planner, but simple
|
|
|
|
* translation of varattnos will suffice. This only occurs for the INSERT
|
|
|
|
* case or in the case of UPDATE tuple routing where we didn't find a
|
|
|
|
* result rel to reuse in ExecSetupPartitionTupleRouting().
|
|
|
|
*/
|
|
|
|
if (node && node->returningLists != NIL)
|
|
|
|
{
|
|
|
|
TupleTableSlot *slot;
|
|
|
|
ExprContext *econtext;
|
|
|
|
List *returningList;
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
|
|
|
|
/* See the comment above for WCO lists. */
|
|
|
|
Assert((node->operation == CMD_INSERT &&
|
|
|
|
list_length(node->returningLists) == 1 &&
|
|
|
|
list_length(node->plans) == 1) ||
|
|
|
|
(node->operation == CMD_UPDATE &&
|
|
|
|
list_length(node->returningLists) ==
|
|
|
|
list_length(node->plans)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the RETURNING list of the first plan as a reference to
|
|
|
|
* calculate attno's for the RETURNING list of this partition. See
|
|
|
|
* the comment above for WCO lists for more details on why this is
|
|
|
|
* okay.
|
|
|
|
*/
|
|
|
|
returningList = linitial(node->returningLists);
|
|
|
|
|
2018-04-19 21:46:53 +02:00
|
|
|
/*
|
|
|
|
* Convert Vars in it to contain this partition's attribute numbers.
|
|
|
|
*/
|
|
|
|
if (part_attnos == NULL)
|
|
|
|
part_attnos =
|
|
|
|
convert_tuples_by_name_map(RelationGetDescr(partrel),
|
|
|
|
RelationGetDescr(firstResultRel),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
returningList = (List *)
|
|
|
|
map_variable_attnos((Node *) returningList,
|
|
|
|
firstVarno, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
2018-04-16 20:50:57 +02:00
|
|
|
|
2018-04-07 01:16:11 +02:00
|
|
|
leaf_part_rri->ri_returningList = returningList;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the projection itself.
|
|
|
|
*
|
|
|
|
* Use the slot and the expression context that would have been set up
|
|
|
|
* in ExecInitModifyTable() for projection's output.
|
|
|
|
*/
|
|
|
|
Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
|
|
|
|
slot = mtstate->ps.ps_ResultTupleSlot;
|
|
|
|
Assert(mtstate->ps.ps_ExprContext != NULL);
|
|
|
|
econtext = mtstate->ps.ps_ExprContext;
|
|
|
|
leaf_part_rri->ri_projectReturning =
|
|
|
|
ExecBuildProjectionInfo(returningList, econtext, slot,
|
|
|
|
&mtstate->ps, RelationGetDescr(partrel));
|
|
|
|
}
|
|
|
|
|
Fix interaction of foreign tuple routing with remote triggers.
Without these fixes, changes to the inserted tuple made by remote
triggers are ignored when building local RETURNING tuples.
In the core code, call ExecInitRoutingInfo at a later point from
within ExecInitPartitionInfo so that the FDW callback gets invoked
after the returning list has been built. But move CheckValidResultRel
out of ExecInitRoutingInfo so that it can happen at an earlier stage.
In postgres_fdw, refactor assorted deparsing functions to work with
the RTE rather than the PlannerInfo, which saves us having to
construct a fake PlannerInfo in cases where we don't have a real one.
Then, we can pass down a constructed RTE that yields the correct
deparse result when no real one exists. Unfortunately, this
necessitates a hack that understands how the core code manages RT
indexes for update tuple routing, which is ugly, but we don't have a
better idea right now.
Original report, analysis, and patch by Etsuro Fujita. Heavily
refactored by me. Then worked over some more by Amit Langote.
Discussion: http://postgr.es/m/5AD4882B.10002@lab.ntt.co.jp
2018-05-01 19:21:46 +02:00
|
|
|
/* Set up information needed for routing tuples to the partition. */
|
|
|
|
ExecInitRoutingInfo(mtstate, estate, proute, leaf_part_rri, partidx);
|
|
|
|
|
2018-03-26 15:43:54 +02:00
|
|
|
/*
|
|
|
|
* If there is an ON CONFLICT clause, initialize state for it.
|
|
|
|
*/
|
|
|
|
if (node && node->onConflictAction != ONCONFLICT_NONE)
|
|
|
|
{
|
|
|
|
TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx];
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
TupleDesc partrelDesc = RelationGetDescr(partrel);
|
|
|
|
ExprContext *econtext = mtstate->ps.ps_ExprContext;
|
|
|
|
ListCell *lc;
|
|
|
|
List *arbiterIndexes = NIL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is a list of arbiter indexes, map it to a list of indexes
|
|
|
|
* in the partition. We do that by scanning the partition's index
|
|
|
|
* list and searching for ancestry relationships to each index in the
|
|
|
|
* ancestor table.
|
|
|
|
*/
|
|
|
|
if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) > 0)
|
|
|
|
{
|
|
|
|
List *childIdxs;
|
|
|
|
|
|
|
|
childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
|
|
|
|
|
|
|
|
foreach(lc, childIdxs)
|
|
|
|
{
|
|
|
|
Oid childIdx = lfirst_oid(lc);
|
|
|
|
List *ancestors;
|
|
|
|
ListCell *lc2;
|
|
|
|
|
|
|
|
ancestors = get_partition_ancestors(childIdx);
|
|
|
|
foreach(lc2, resultRelInfo->ri_onConflictArbiterIndexes)
|
|
|
|
{
|
|
|
|
if (list_member_oid(ancestors, lfirst_oid(lc2)))
|
|
|
|
arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
|
|
|
|
}
|
|
|
|
list_free(ancestors);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the resulting lists are of inequal length, something is wrong.
|
|
|
|
* (This shouldn't happen, since arbiter index selection should not
|
|
|
|
* pick up an invalid index.)
|
|
|
|
*/
|
|
|
|
if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) !=
|
|
|
|
list_length(arbiterIndexes))
|
|
|
|
elog(ERROR, "invalid arbiter index list");
|
|
|
|
leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the DO UPDATE case, we have some more state to initialize.
|
|
|
|
*/
|
|
|
|
if (node->onConflictAction == ONCONFLICT_UPDATE)
|
|
|
|
{
|
|
|
|
Assert(node->onConflictSet != NIL);
|
|
|
|
Assert(resultRelInfo->ri_onConflict != NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the partition's tuple descriptor matches exactly the root
|
|
|
|
* parent (the common case), we can simply re-use the parent's ON
|
|
|
|
* CONFLICT SET state, skipping a bunch of work. Otherwise, we
|
|
|
|
* need to create state specific to this partition.
|
|
|
|
*/
|
|
|
|
if (map == NULL)
|
|
|
|
leaf_part_rri->ri_onConflict = resultRelInfo->ri_onConflict;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
List *onconflset;
|
|
|
|
TupleDesc tupDesc;
|
|
|
|
bool found_whole_row;
|
|
|
|
|
|
|
|
leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Translate expressions in onConflictSet to account for
|
|
|
|
* different attribute numbers. For that, map partition
|
|
|
|
* varattnos twice: first to catch the EXCLUDED
|
|
|
|
* pseudo-relation (INNER_VAR), and second to handle the main
|
|
|
|
* target relation (firstVarno).
|
|
|
|
*/
|
|
|
|
onconflset = (List *) copyObject((Node *) node->onConflictSet);
|
2018-04-19 21:46:53 +02:00
|
|
|
if (part_attnos == NULL)
|
|
|
|
part_attnos =
|
|
|
|
convert_tuples_by_name_map(RelationGetDescr(partrel),
|
|
|
|
RelationGetDescr(firstResultRel),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
onconflset = (List *)
|
|
|
|
map_variable_attnos((Node *) onconflset,
|
|
|
|
INNER_VAR, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
|
|
|
onconflset = (List *)
|
|
|
|
map_variable_attnos((Node *) onconflset,
|
|
|
|
firstVarno, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
|
|
|
|
|
|
|
/* Finally, adjust this tlist to match the partition. */
|
|
|
|
onconflset = adjust_partition_tlist(onconflset, map);
|
2018-03-26 15:43:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Build UPDATE SET's projection info. The user of this
|
|
|
|
* projection is responsible for setting the slot's tupdesc!
|
|
|
|
* We set aside a tupdesc that's good for the common case of a
|
|
|
|
* partition that's tupdesc-equal to the partitioned table;
|
|
|
|
* partitions of different tupdescs must generate their own.
|
|
|
|
*/
|
|
|
|
tupDesc = ExecTypeFromTL(onconflset, partrelDesc->tdhasoid);
|
|
|
|
ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc);
|
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjInfo =
|
|
|
|
ExecBuildProjectionInfo(onconflset, econtext,
|
|
|
|
mtstate->mt_conflproj,
|
|
|
|
&mtstate->ps, partrelDesc);
|
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjTupdesc = tupDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is a WHERE clause, initialize state where it will
|
|
|
|
* be evaluated, mapping the attribute numbers appropriately.
|
|
|
|
* As with onConflictSet, we need to map partition varattnos
|
|
|
|
* to the partition's tupdesc.
|
|
|
|
*/
|
|
|
|
if (node->onConflictWhere)
|
|
|
|
{
|
|
|
|
List *clause;
|
|
|
|
|
|
|
|
clause = copyObject((List *) node->onConflictWhere);
|
2018-04-19 21:46:53 +02:00
|
|
|
clause = (List *)
|
|
|
|
map_variable_attnos((Node *) clause,
|
|
|
|
INNER_VAR, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
|
|
|
clause = (List *)
|
|
|
|
map_variable_attnos((Node *) clause,
|
|
|
|
firstVarno, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
2018-03-26 15:43:54 +02:00
|
|
|
leaf_part_rri->ri_onConflict->oc_WhereClause =
|
|
|
|
ExecInitQual((List *) clause, &mtstate->ps);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Assert(proute->partitions[partidx] == NULL);
|
|
|
|
proute->partitions[partidx] = leaf_part_rri;
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
MemoryContextSwitchTo(oldContext);
|
|
|
|
|
|
|
|
return leaf_part_rri;
|
|
|
|
}
|
|
|
|
|
2018-04-07 01:16:11 +02:00
|
|
|
/*
|
|
|
|
* ExecInitRoutingInfo
|
Fix interaction of foreign tuple routing with remote triggers.
Without these fixes, changes to the inserted tuple made by remote
triggers are ignored when building local RETURNING tuples.
In the core code, call ExecInitRoutingInfo at a later point from
within ExecInitPartitionInfo so that the FDW callback gets invoked
after the returning list has been built. But move CheckValidResultRel
out of ExecInitRoutingInfo so that it can happen at an earlier stage.
In postgres_fdw, refactor assorted deparsing functions to work with
the RTE rather than the PlannerInfo, which saves us having to
construct a fake PlannerInfo in cases where we don't have a real one.
Then, we can pass down a constructed RTE that yields the correct
deparse result when no real one exists. Unfortunately, this
necessitates a hack that understands how the core code manages RT
indexes for update tuple routing, which is ugly, but we don't have a
better idea right now.
Original report, analysis, and patch by Etsuro Fujita. Heavily
refactored by me. Then worked over some more by Amit Langote.
Discussion: http://postgr.es/m/5AD4882B.10002@lab.ntt.co.jp
2018-05-01 19:21:46 +02:00
|
|
|
* Set up information needed for routing tuples to a leaf partition
|
2018-04-07 01:16:11 +02:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecInitRoutingInfo(ModifyTableState *mtstate,
|
|
|
|
EState *estate,
|
|
|
|
PartitionTupleRouting *proute,
|
|
|
|
ResultRelInfo *partRelInfo,
|
|
|
|
int partidx)
|
|
|
|
{
|
|
|
|
MemoryContext oldContext;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Switch into per-query memory context.
|
|
|
|
*/
|
|
|
|
oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up a tuple conversion map to convert a tuple routed to the
|
|
|
|
* partition from the parent's type to the partition's.
|
|
|
|
*/
|
|
|
|
proute->parent_child_tupconv_maps[partidx] =
|
|
|
|
convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_PartitionRoot),
|
|
|
|
RelationGetDescr(partRelInfo->ri_RelationDesc),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
/*
|
|
|
|
* If a partition has a different rowtype than the root parent, initialize
|
|
|
|
* a slot dedicated to storing this partition's tuples. The slot is used
|
|
|
|
* for various operations that are applied to tuples after routing, such
|
|
|
|
* as checking constraints.
|
|
|
|
*/
|
|
|
|
if (proute->parent_child_tupconv_maps[partidx] != NULL)
|
|
|
|
{
|
|
|
|
Relation partrel = partRelInfo->ri_RelationDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the array in proute where these slots are stored, if not
|
|
|
|
* already done.
|
|
|
|
*/
|
|
|
|
if (proute->partition_tuple_slots == NULL)
|
|
|
|
proute->partition_tuple_slots = (TupleTableSlot **)
|
|
|
|
palloc0(proute->num_partitions *
|
|
|
|
sizeof(TupleTableSlot *));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the slot itself setting its descriptor to this
|
|
|
|
* partition's TupleDesc; TupleDesc reference will be released at the
|
|
|
|
* end of the command.
|
|
|
|
*/
|
|
|
|
proute->partition_tuple_slots[partidx] =
|
|
|
|
ExecInitExtraTupleSlot(estate,
|
|
|
|
RelationGetDescr(partrel));
|
|
|
|
}
|
|
|
|
|
2018-04-07 01:16:11 +02:00
|
|
|
/*
|
|
|
|
* If the partition is a foreign table, let the FDW init itself for
|
|
|
|
* routing tuples to the partition.
|
|
|
|
*/
|
|
|
|
if (partRelInfo->ri_FdwRoutine != NULL &&
|
|
|
|
partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
|
|
|
|
partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldContext);
|
|
|
|
|
|
|
|
partRelInfo->ri_PartitionReadyForRouting = true;
|
|
|
|
}
|
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
|
|
|
* ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition
|
|
|
|
* child-to-root tuple conversion map array.
|
|
|
|
*
|
|
|
|
* This map is required for capturing transition tuples when the target table
|
|
|
|
* is a partitioned table. For a tuple that is routed by an INSERT or UPDATE,
|
|
|
|
* we need to convert it from the leaf partition to the target table
|
|
|
|
* descriptor.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute)
|
|
|
|
{
|
|
|
|
Assert(proute != NULL);
|
|
|
|
|
|
|
|
/*
|
2018-02-06 21:50:13 +01:00
|
|
|
* These array elements get filled up with maps on an on-demand basis.
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
* Initially just set all of them to NULL.
|
|
|
|
*/
|
|
|
|
proute->child_parent_tupconv_maps =
|
|
|
|
(TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) *
|
|
|
|
proute->num_partitions);
|
|
|
|
|
|
|
|
/* Same is the case for this array. All the values are set to false */
|
|
|
|
proute->child_parent_map_not_required =
|
|
|
|
(bool *) palloc0(sizeof(bool) * proute->num_partitions);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TupConvMapForLeaf -- Get the tuple conversion map for a given leaf partition
|
|
|
|
* index.
|
|
|
|
*/
|
|
|
|
TupleConversionMap *
|
|
|
|
TupConvMapForLeaf(PartitionTupleRouting *proute,
|
|
|
|
ResultRelInfo *rootRelInfo, int leaf_index)
|
|
|
|
{
|
|
|
|
ResultRelInfo **resultRelInfos = proute->partitions;
|
|
|
|
TupleConversionMap **map;
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
|
|
|
|
/* Don't call this if we're not supposed to be using this type of map. */
|
|
|
|
Assert(proute->child_parent_tupconv_maps != NULL);
|
|
|
|
|
|
|
|
/* If it's already known that we don't need a map, return NULL. */
|
|
|
|
if (proute->child_parent_map_not_required[leaf_index])
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* If we've already got a map, return it. */
|
|
|
|
map = &proute->child_parent_tupconv_maps[leaf_index];
|
|
|
|
if (*map != NULL)
|
|
|
|
return *map;
|
|
|
|
|
|
|
|
/* No map yet; try to create one. */
|
|
|
|
tupdesc = RelationGetDescr(resultRelInfos[leaf_index]->ri_RelationDesc);
|
|
|
|
*map =
|
|
|
|
convert_tuples_by_name(tupdesc,
|
|
|
|
RelationGetDescr(rootRelInfo->ri_RelationDesc),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
|
|
|
|
/* If it turns out no map is needed, remember for next time. */
|
|
|
|
proute->child_parent_map_not_required[leaf_index] = (*map == NULL);
|
|
|
|
|
|
|
|
return *map;
|
|
|
|
}
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
/*
|
|
|
|
* ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
|
|
|
|
* routing.
|
|
|
|
*
|
|
|
|
* Close all the partitioned tables, leaf partitions, and their indices.
|
|
|
|
*/
|
|
|
|
void
|
2018-04-07 01:16:11 +02:00
|
|
|
ExecCleanupTupleRouting(ModifyTableState *mtstate,
|
|
|
|
PartitionTupleRouting *proute)
|
2018-01-04 21:48:15 +01:00
|
|
|
{
|
|
|
|
int i;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
int subplan_index = 0;
|
2018-01-04 21:48:15 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Remember, proute->partition_dispatch_info[0] corresponds to the root
|
|
|
|
* partitioned table, which we must not try to close, because it is the
|
|
|
|
* main target table of the query that will be closed by callers such as
|
|
|
|
* ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
|
|
|
|
* partitioned table.
|
|
|
|
*/
|
|
|
|
for (i = 1; i < proute->num_dispatch; i++)
|
|
|
|
{
|
|
|
|
PartitionDispatch pd = proute->partition_dispatch_info[i];
|
|
|
|
|
|
|
|
heap_close(pd->reldesc, NoLock);
|
|
|
|
ExecDropSingleTupleTableSlot(pd->tupslot);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < proute->num_partitions; i++)
|
|
|
|
{
|
|
|
|
ResultRelInfo *resultRelInfo = proute->partitions[i];
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/* skip further processsing for uninitialized partitions */
|
|
|
|
if (resultRelInfo == NULL)
|
|
|
|
continue;
|
|
|
|
|
2018-04-07 01:16:11 +02:00
|
|
|
/* Allow any FDWs to shut down if they've been exercised */
|
|
|
|
if (resultRelInfo->ri_PartitionReadyForRouting &&
|
|
|
|
resultRelInfo->ri_FdwRoutine != NULL &&
|
|
|
|
resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
|
|
|
|
resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
|
|
|
|
resultRelInfo);
|
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
|
|
|
* If this result rel is one of the UPDATE subplan result rels, let
|
|
|
|
* ExecEndPlan() close it. For INSERT or COPY,
|
|
|
|
* proute->subplan_partition_offsets will always be NULL. Note that
|
|
|
|
* the subplan_partition_offsets array and the partitions array have
|
|
|
|
* the partitions in the same order. So, while we iterate over
|
|
|
|
* partitions array, we also iterate over the
|
|
|
|
* subplan_partition_offsets array in order to figure out which of the
|
|
|
|
* result rels are present in the UPDATE subplans.
|
|
|
|
*/
|
|
|
|
if (proute->subplan_partition_offsets &&
|
2018-01-24 22:34:51 +01:00
|
|
|
subplan_index < proute->num_subplan_partition_offsets &&
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
proute->subplan_partition_offsets[subplan_index] == i)
|
|
|
|
{
|
|
|
|
subplan_index++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
ExecCloseIndices(resultRelInfo);
|
|
|
|
heap_close(resultRelInfo->ri_RelationDesc, NoLock);
|
|
|
|
}
|
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/* Release the standalone partition tuple descriptors, if any */
|
|
|
|
if (proute->root_tuple_slot)
|
|
|
|
ExecDropSingleTupleTableSlot(proute->root_tuple_slot);
|
2018-01-04 21:48:15 +01:00
|
|
|
}
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
/*
|
|
|
|
* RelationGetPartitionDispatchInfo
|
|
|
|
* Returns information necessary to route tuples down a partition tree
|
|
|
|
*
|
|
|
|
* The number of elements in the returned array (that is, the number of
|
|
|
|
* PartitionDispatch objects for the partitioned tables in the partition tree)
|
|
|
|
* is returned in *num_parted and a list of the OIDs of all the leaf
|
|
|
|
* partitions of rel is returned in *leaf_part_oids.
|
|
|
|
*
|
|
|
|
* All the relations in the partition tree (including 'rel') must have been
|
|
|
|
* locked (using at least the AccessShareLock) by the caller.
|
|
|
|
*/
|
|
|
|
static PartitionDispatch *
|
|
|
|
RelationGetPartitionDispatchInfo(Relation rel,
|
|
|
|
int *num_parted, List **leaf_part_oids)
|
|
|
|
{
|
|
|
|
List *pdlist = NIL;
|
|
|
|
PartitionDispatchData **pd;
|
|
|
|
ListCell *lc;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
|
|
|
|
|
|
|
|
*num_parted = 0;
|
|
|
|
*leaf_part_oids = NIL;
|
|
|
|
|
|
|
|
get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids);
|
|
|
|
*num_parted = list_length(pdlist);
|
|
|
|
pd = (PartitionDispatchData **) palloc(*num_parted *
|
|
|
|
sizeof(PartitionDispatchData *));
|
|
|
|
i = 0;
|
|
|
|
foreach(lc, pdlist)
|
|
|
|
{
|
|
|
|
pd[i++] = lfirst(lc);
|
|
|
|
}
|
|
|
|
|
|
|
|
return pd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get_partition_dispatch_recurse
|
|
|
|
* Recursively expand partition tree rooted at rel
|
|
|
|
*
|
|
|
|
* As the partition tree is expanded in a depth-first manner, we maintain two
|
|
|
|
* global lists: of PartitionDispatch objects corresponding to partitioned
|
|
|
|
* tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
|
|
|
|
*
|
|
|
|
* Note that the order of OIDs of leaf partitions in leaf_part_oids matches
|
|
|
|
* the order in which the planner's expand_partitioned_rtentry() processes
|
|
|
|
* them. It's not necessarily the case that the offsets match up exactly,
|
|
|
|
* because constraint exclusion might prune away some partitions on the
|
|
|
|
* planner side, whereas we'll always have the complete list; but unpruned
|
|
|
|
* partitions will appear in the same order in the plan as they are returned
|
|
|
|
* here.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
get_partition_dispatch_recurse(Relation rel, Relation parent,
|
|
|
|
List **pds, List **leaf_part_oids)
|
|
|
|
{
|
|
|
|
TupleDesc tupdesc = RelationGetDescr(rel);
|
|
|
|
PartitionDesc partdesc = RelationGetPartitionDesc(rel);
|
|
|
|
PartitionKey partkey = RelationGetPartitionKey(rel);
|
|
|
|
PartitionDispatch pd;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
check_stack_depth();
|
|
|
|
|
|
|
|
/* Build a PartitionDispatch for this table and add it to *pds. */
|
|
|
|
pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
|
|
|
|
*pds = lappend(*pds, pd);
|
|
|
|
pd->reldesc = rel;
|
|
|
|
pd->key = partkey;
|
|
|
|
pd->keystate = NIL;
|
|
|
|
pd->partdesc = partdesc;
|
|
|
|
if (parent != NULL)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* For every partitioned table other than the root, we must store a
|
|
|
|
* tuple table slot initialized with its tuple descriptor and a tuple
|
|
|
|
* conversion map to convert a tuple from its parent's rowtype to its
|
|
|
|
* own. That is to make sure that we are looking at the correct row
|
|
|
|
* using the correct tuple descriptor when computing its partition key
|
|
|
|
* for tuple routing.
|
|
|
|
*/
|
|
|
|
pd->tupslot = MakeSingleTupleTableSlot(tupdesc);
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
pd->tupmap = convert_tuples_by_name_map_if_req(RelationGetDescr(parent),
|
|
|
|
tupdesc,
|
|
|
|
gettext_noop("could not convert row type"));
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Not required for the root partitioned table */
|
|
|
|
pd->tupslot = NULL;
|
|
|
|
pd->tupmap = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go look at each partition of this table. If it's a leaf partition,
|
|
|
|
* simply add its OID to *leaf_part_oids. If it's a partitioned table,
|
|
|
|
* recursively call get_partition_dispatch_recurse(), so that its
|
|
|
|
* partitions are processed as well and a corresponding PartitionDispatch
|
|
|
|
* object gets added to *pds.
|
|
|
|
*
|
2018-05-16 16:46:55 +02:00
|
|
|
* The 'indexes' array is used when searching for a partition matching a
|
|
|
|
* given tuple. The actual value we store here depends on whether the
|
|
|
|
* array element belongs to a leaf partition or a subpartitioned table.
|
2018-05-18 22:09:14 +02:00
|
|
|
* For leaf partitions we store the index into *leaf_part_oids, and for
|
|
|
|
* sub-partitioned tables we store a negative version of the index into
|
|
|
|
* the *pds list. Both indexes are 0-based, but the first element of the
|
|
|
|
* *pds list is the root partition, so 0 always means the first leaf. When
|
|
|
|
* searching, if we see a negative value, the search must continue in the
|
|
|
|
* corresponding sub-partition; otherwise, we've identified the correct
|
|
|
|
* partition.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
|
|
|
pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
|
|
|
|
for (i = 0; i < partdesc->nparts; i++)
|
|
|
|
{
|
|
|
|
Oid partrelid = partdesc->oids[i];
|
|
|
|
|
|
|
|
if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
|
|
|
|
{
|
|
|
|
*leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
|
|
|
|
pd->indexes[i] = list_length(*leaf_part_oids) - 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We assume all tables in the partition tree were already locked
|
|
|
|
* by the caller.
|
|
|
|
*/
|
|
|
|
Relation partrel = heap_open(partrelid, NoLock);
|
|
|
|
|
|
|
|
pd->indexes[i] = -list_length(*pds);
|
|
|
|
get_partition_dispatch_recurse(partrel, rel, pds, leaf_part_oids);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------
|
|
|
|
* FormPartitionKeyDatum
|
|
|
|
* Construct values[] and isnull[] arrays for the partition key
|
|
|
|
* of a tuple.
|
|
|
|
*
|
|
|
|
* pd Partition dispatch object of the partitioned table
|
|
|
|
* slot Heap tuple from which to extract partition key
|
|
|
|
* estate executor state for evaluating any partition key
|
|
|
|
* expressions (must be non-NULL)
|
|
|
|
* values Array of partition key Datums (output area)
|
|
|
|
* isnull Array of is-null indicators (output area)
|
|
|
|
*
|
|
|
|
* the ecxt_scantuple slot of estate's per-tuple expr context must point to
|
|
|
|
* the heap tuple passed in.
|
|
|
|
* ----------------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FormPartitionKeyDatum(PartitionDispatch pd,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull)
|
|
|
|
{
|
|
|
|
ListCell *partexpr_item;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (pd->key->partexprs != NIL && pd->keystate == NIL)
|
|
|
|
{
|
|
|
|
/* Check caller has set up context correctly */
|
|
|
|
Assert(estate != NULL &&
|
|
|
|
GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
|
|
|
|
|
|
|
|
/* First time through, set up expression evaluation state */
|
|
|
|
pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
|
|
|
|
}
|
|
|
|
|
|
|
|
partexpr_item = list_head(pd->keystate);
|
|
|
|
for (i = 0; i < pd->key->partnatts; i++)
|
|
|
|
{
|
|
|
|
AttrNumber keycol = pd->key->partattrs[i];
|
|
|
|
Datum datum;
|
|
|
|
bool isNull;
|
|
|
|
|
|
|
|
if (keycol != 0)
|
|
|
|
{
|
|
|
|
/* Plain column; get the value directly from the heap tuple */
|
|
|
|
datum = slot_getattr(slot, keycol, &isNull);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Expression; need to evaluate it */
|
|
|
|
if (partexpr_item == NULL)
|
|
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
|
|
datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
|
|
|
|
GetPerTupleExprContext(estate),
|
|
|
|
&isNull);
|
|
|
|
partexpr_item = lnext(partexpr_item);
|
|
|
|
}
|
|
|
|
values[i] = datum;
|
|
|
|
isnull[i] = isNull;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (partexpr_item != NULL)
|
|
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
|
|
}
|
|
|
|
|
2018-04-15 02:12:14 +02:00
|
|
|
/*
|
|
|
|
* get_partition_for_tuple
|
|
|
|
* Finds partition of relation which accepts the partition key specified
|
|
|
|
* in values and isnull
|
|
|
|
*
|
|
|
|
* Return value is index of the partition (>= 0 and < partdesc->nparts) if one
|
|
|
|
* found or -1 if none found.
|
|
|
|
*/
|
2018-04-21 17:21:08 +02:00
|
|
|
static int
|
2018-07-27 15:34:57 +02:00
|
|
|
get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
|
2018-04-15 02:12:14 +02:00
|
|
|
{
|
|
|
|
int bound_offset;
|
|
|
|
int part_index = -1;
|
2018-07-27 15:34:57 +02:00
|
|
|
PartitionKey key = pd->key;
|
|
|
|
PartitionDesc partdesc = pd->partdesc;
|
2018-06-13 19:18:02 +02:00
|
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
2018-04-15 02:12:14 +02:00
|
|
|
|
|
|
|
/* Route as appropriate based on partitioning strategy. */
|
|
|
|
switch (key->strategy)
|
|
|
|
{
|
|
|
|
case PARTITION_STRATEGY_HASH:
|
|
|
|
{
|
2018-06-13 19:18:02 +02:00
|
|
|
int greatest_modulus;
|
|
|
|
uint64 rowHash;
|
|
|
|
|
|
|
|
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
|
|
|
|
rowHash = compute_partition_hash_value(key->partnatts,
|
|
|
|
key->partsupfunc,
|
|
|
|
values, isnull);
|
2018-04-15 02:12:14 +02:00
|
|
|
|
|
|
|
part_index = boundinfo->indexes[rowHash % greatest_modulus];
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
|
|
if (isnull[0])
|
|
|
|
{
|
2018-06-13 19:18:02 +02:00
|
|
|
if (partition_bound_accepts_nulls(boundinfo))
|
|
|
|
part_index = boundinfo->null_index;
|
2018-04-15 02:12:14 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bool equal = false;
|
|
|
|
|
|
|
|
bound_offset = partition_list_bsearch(key->partsupfunc,
|
|
|
|
key->partcollation,
|
2018-06-13 19:18:02 +02:00
|
|
|
boundinfo,
|
2018-04-15 02:12:14 +02:00
|
|
|
values[0], &equal);
|
|
|
|
if (bound_offset >= 0 && equal)
|
2018-06-13 19:18:02 +02:00
|
|
|
part_index = boundinfo->indexes[bound_offset];
|
2018-04-15 02:12:14 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
|
|
{
|
|
|
|
bool equal = false,
|
|
|
|
range_partkey_has_null = false;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No range includes NULL, so this will be accepted by the
|
|
|
|
* default partition if there is one, and otherwise rejected.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < key->partnatts; i++)
|
|
|
|
{
|
|
|
|
if (isnull[i])
|
|
|
|
{
|
|
|
|
range_partkey_has_null = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!range_partkey_has_null)
|
|
|
|
{
|
|
|
|
bound_offset = partition_range_datum_bsearch(key->partsupfunc,
|
|
|
|
key->partcollation,
|
2018-06-13 19:18:02 +02:00
|
|
|
boundinfo,
|
2018-04-15 02:12:14 +02:00
|
|
|
key->partnatts,
|
|
|
|
values,
|
|
|
|
&equal);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The bound at bound_offset is less than or equal to the
|
|
|
|
* tuple value, so the bound at offset+1 is the upper
|
|
|
|
* bound of the partition we're looking for, if there
|
|
|
|
* actually exists one.
|
|
|
|
*/
|
2018-06-13 19:18:02 +02:00
|
|
|
part_index = boundinfo->indexes[bound_offset + 1];
|
2018-04-15 02:12:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
|
|
(int) key->strategy);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* part_index < 0 means we failed to find a partition of this parent. Use
|
|
|
|
* the default partition, if there is one.
|
|
|
|
*/
|
|
|
|
if (part_index < 0)
|
2018-06-13 19:18:02 +02:00
|
|
|
part_index = boundinfo->default_index;
|
2018-04-15 02:12:14 +02:00
|
|
|
|
|
|
|
return part_index;
|
|
|
|
}
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
/*
|
2017-11-28 20:17:21 +01:00
|
|
|
* ExecBuildSlotPartitionKeyDescription
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
|
|
|
* This works very much like BuildIndexValueDescription() and is currently
|
|
|
|
* used for building error messages when ExecFindPartition() fails to find
|
|
|
|
* partition for a row.
|
|
|
|
*/
|
|
|
|
static char *
|
|
|
|
ExecBuildSlotPartitionKeyDescription(Relation rel,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull,
|
|
|
|
int maxfieldlen)
|
|
|
|
{
|
|
|
|
StringInfoData buf;
|
|
|
|
PartitionKey key = RelationGetPartitionKey(rel);
|
|
|
|
int partnatts = get_partition_natts(key);
|
|
|
|
int i;
|
|
|
|
Oid relid = RelationGetRelid(rel);
|
|
|
|
AclResult aclresult;
|
|
|
|
|
|
|
|
if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* If the user has table-level access, just go build the description. */
|
|
|
|
aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
|
|
|
|
if (aclresult != ACLCHECK_OK)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Step through the columns of the partition key and make sure the
|
|
|
|
* user has SELECT rights on all of them.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < partnatts; i++)
|
|
|
|
{
|
|
|
|
AttrNumber attnum = get_partition_col_attnum(key, i);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this partition key column is an expression, we return no
|
|
|
|
* detail rather than try to figure out what column(s) the
|
|
|
|
* expression includes and if the user has SELECT rights on them.
|
|
|
|
*/
|
|
|
|
if (attnum == InvalidAttrNumber ||
|
|
|
|
pg_attribute_aclcheck(relid, attnum, GetUserId(),
|
|
|
|
ACL_SELECT) != ACLCHECK_OK)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
initStringInfo(&buf);
|
|
|
|
appendStringInfo(&buf, "(%s) = (",
|
|
|
|
pg_get_partkeydef_columns(relid, true));
|
|
|
|
|
|
|
|
for (i = 0; i < partnatts; i++)
|
|
|
|
{
|
|
|
|
char *val;
|
|
|
|
int vallen;
|
|
|
|
|
|
|
|
if (isnull[i])
|
|
|
|
val = "null";
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Oid foutoid;
|
|
|
|
bool typisvarlena;
|
|
|
|
|
|
|
|
getTypeOutputInfo(get_partition_col_typid(key, i),
|
|
|
|
&foutoid, &typisvarlena);
|
|
|
|
val = OidOutputFunctionCall(foutoid, values[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i > 0)
|
|
|
|
appendStringInfoString(&buf, ", ");
|
|
|
|
|
|
|
|
/* truncate if needed */
|
|
|
|
vallen = strlen(val);
|
|
|
|
if (vallen <= maxfieldlen)
|
|
|
|
appendStringInfoString(&buf, val);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
vallen = pg_mbcliplen(val, vallen, maxfieldlen);
|
|
|
|
appendBinaryStringInfo(&buf, val, vallen);
|
|
|
|
appendStringInfoString(&buf, "...");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
appendStringInfoChar(&buf, ')');
|
|
|
|
|
|
|
|
return buf.data;
|
|
|
|
}
|
2018-03-26 15:43:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* adjust_partition_tlist
|
|
|
|
* Adjust the targetlist entries for a given partition to account for
|
|
|
|
* attribute differences between parent and the partition
|
|
|
|
*
|
|
|
|
* The expressions have already been fixed, but here we fix the list to make
|
|
|
|
* target resnos match the partition's attribute numbers. This results in a
|
|
|
|
* copy of the original target list in which the entries appear in resno
|
|
|
|
* order, including both the existing entries (that may have their resno
|
|
|
|
* changed in-place) and the newly added entries for columns that don't exist
|
|
|
|
* in the parent.
|
|
|
|
*
|
|
|
|
* Scribbles on the input tlist, so callers must make sure to make a copy
|
|
|
|
* before passing it to us.
|
|
|
|
*/
|
|
|
|
static List *
|
|
|
|
adjust_partition_tlist(List *tlist, TupleConversionMap *map)
|
|
|
|
{
|
|
|
|
List *new_tlist = NIL;
|
|
|
|
TupleDesc tupdesc = map->outdesc;
|
|
|
|
AttrNumber *attrMap = map->attrMap;
|
|
|
|
AttrNumber attrno;
|
|
|
|
|
|
|
|
for (attrno = 1; attrno <= tupdesc->natts; attrno++)
|
|
|
|
{
|
|
|
|
Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1);
|
|
|
|
TargetEntry *tle;
|
|
|
|
|
|
|
|
if (attrMap[attrno - 1] != InvalidAttrNumber)
|
|
|
|
{
|
|
|
|
Assert(!att_tup->attisdropped);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the corresponding entry from the parent's tlist, adjusting
|
|
|
|
* the resno the match the partition's attno.
|
|
|
|
*/
|
|
|
|
tle = (TargetEntry *) list_nth(tlist, attrMap[attrno - 1] - 1);
|
|
|
|
tle->resno = attrno;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Const *expr;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For a dropped attribute in the partition, generate a dummy
|
|
|
|
* entry with resno matching the partition's attno.
|
|
|
|
*/
|
|
|
|
Assert(att_tup->attisdropped);
|
|
|
|
expr = makeConst(INT4OID,
|
|
|
|
-1,
|
|
|
|
InvalidOid,
|
|
|
|
sizeof(int32),
|
|
|
|
(Datum) 0,
|
|
|
|
true, /* isnull */
|
|
|
|
true /* byval */ );
|
|
|
|
tle = makeTargetEntry((Expr *) expr,
|
|
|
|
attrno,
|
|
|
|
pstrdup(NameStr(att_tup->attname)),
|
|
|
|
false);
|
|
|
|
}
|
|
|
|
|
|
|
|
new_tlist = lappend(new_tlist, tle);
|
|
|
|
}
|
|
|
|
|
|
|
|
return new_tlist;
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* Run-Time Partition Pruning Support.
|
|
|
|
*
|
|
|
|
* The following series of functions exist to support the removal of unneeded
|
2018-06-11 00:24:34 +02:00
|
|
|
* subplans for queries against partitioned tables. The supporting functions
|
|
|
|
* here are designed to work with any plan type which supports an arbitrary
|
|
|
|
* number of subplans, e.g. Append, MergeAppend.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-10 21:22:25 +02:00
|
|
|
* When pruning involves comparison of a partition key to a constant, it's
|
|
|
|
* done by the planner. However, if we have a comparison to a non-constant
|
|
|
|
* but not volatile expression, that presents an opportunity for run-time
|
|
|
|
* pruning by the executor, allowing irrelevant partitions to be skipped
|
|
|
|
* dynamically.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-10 21:22:25 +02:00
|
|
|
* We must distinguish expressions containing PARAM_EXEC Params from
|
|
|
|
* expressions that don't contain those. Even though a PARAM_EXEC Param is
|
2018-06-11 00:24:34 +02:00
|
|
|
* considered to be a stable expression, it can change value from one plan
|
|
|
|
* node scan to the next during query execution. Stable comparison
|
|
|
|
* expressions that don't involve such Params allow partition pruning to be
|
|
|
|
* done once during executor startup. Expressions that do involve such Params
|
|
|
|
* require us to prune separately for each scan of the parent plan node.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* Note that pruning away unneeded subplans during executor startup has the
|
|
|
|
* added benefit of not having to initialize the unneeded subplans at all.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* Functions:
|
|
|
|
*
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
* ExecCreatePartitionPruneState:
|
2018-06-11 00:24:34 +02:00
|
|
|
* Creates the PartitionPruneState required by each of the two pruning
|
|
|
|
* functions. Details stored include how to map the partition index
|
|
|
|
* returned by the partition pruning code into subplan indexes.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
* ExecDestroyPartitionPruneState:
|
|
|
|
* Deletes a PartitionPruneState. Must be called during executor shutdown.
|
|
|
|
*
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
* ExecFindInitialMatchingSubPlans:
|
2018-06-11 00:24:34 +02:00
|
|
|
* Returns indexes of matching subplans. Partition pruning is attempted
|
2018-06-10 21:22:25 +02:00
|
|
|
* without any evaluation of expressions containing PARAM_EXEC Params.
|
2018-06-11 00:24:34 +02:00
|
|
|
* This function must be called during executor startup for the parent
|
|
|
|
* plan before the subplans themselves are initialized. Subplans which
|
|
|
|
* are found not to match by this function must be removed from the
|
|
|
|
* plan's list of subplans during execution, as this function performs a
|
|
|
|
* remap of the partition index to subplan index map and the newly
|
|
|
|
* created map provides indexes only for subplans which remain after
|
|
|
|
* calling this function.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
|
|
|
* ExecFindMatchingSubPlans:
|
2018-06-11 00:24:34 +02:00
|
|
|
* Returns indexes of matching subplans after evaluating all available
|
|
|
|
* expressions. This function can only be called during execution and
|
|
|
|
* must be called again each time the value of a Param listed in
|
|
|
|
* PartitionPruneState's 'execparamids' changes.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
* ExecCreatePartitionPruneState
|
|
|
|
* Build the data structure required for calling
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
* ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans.
|
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* 'planstate' is the parent plan node's execution state.
|
|
|
|
*
|
2018-08-02 01:42:46 +02:00
|
|
|
* 'partitionpruneinfo' is a PartitionPruneInfo as generated by
|
2018-06-11 00:24:34 +02:00
|
|
|
* make_partition_pruneinfo. Here we build a PartitionPruneState containing a
|
2018-08-02 01:42:46 +02:00
|
|
|
* PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
|
|
|
|
* partitionpruneinfo->prune_infos), each of which contains a
|
|
|
|
* PartitionedRelPruningData for each PartitionedRelPruneInfo appearing in
|
|
|
|
* that sublist. This two-level system is needed to keep from confusing the
|
|
|
|
* different hierarchies when a UNION ALL contains multiple partitioned tables
|
|
|
|
* as children. The data stored in each PartitionedRelPruningData can be
|
|
|
|
* re-used each time we re-evaluate which partitions match the pruning steps
|
|
|
|
* provided in each PartitionedRelPruneInfo.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
PartitionPruneState *
|
2018-08-02 01:42:46 +02:00
|
|
|
ExecCreatePartitionPruneState(PlanState *planstate,
|
|
|
|
PartitionPruneInfo *partitionpruneinfo)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
|
|
|
PartitionPruneState *prunestate;
|
2018-08-02 01:42:46 +02:00
|
|
|
int n_part_hierarchies;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
ListCell *lc;
|
|
|
|
int i;
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
|
|
|
|
Assert(n_part_hierarchies > 0);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-06-11 00:24:34 +02:00
|
|
|
/*
|
|
|
|
* Allocate the data structure
|
|
|
|
*/
|
2018-08-02 01:42:46 +02:00
|
|
|
prunestate = (PartitionPruneState *)
|
|
|
|
palloc(offsetof(PartitionPruneState, partprunedata) +
|
|
|
|
sizeof(PartitionPruningData *) * n_part_hierarchies);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
prunestate->execparamids = NULL;
|
|
|
|
/* other_subplans can change at runtime, so we need our own copy */
|
|
|
|
prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans);
|
2018-06-10 21:22:25 +02:00
|
|
|
prunestate->do_initial_prune = false; /* may be set below */
|
|
|
|
prunestate->do_exec_prune = false; /* may be set below */
|
2018-08-02 01:42:46 +02:00
|
|
|
prunestate->num_partprunedata = n_part_hierarchies;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
/*
|
2018-06-11 00:24:34 +02:00
|
|
|
* Create a short-term memory context which we'll use when making calls to
|
|
|
|
* the partition pruning functions. This avoids possible memory leaks,
|
|
|
|
* since the pruning functions call comparison functions that aren't under
|
|
|
|
* our control.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
prunestate->prune_context =
|
|
|
|
AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
"Partition Prune",
|
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
|
|
|
i = 0;
|
2018-08-02 01:42:46 +02:00
|
|
|
foreach(lc, partitionpruneinfo->prune_infos)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
List *partrelpruneinfos = lfirst_node(List, lc);
|
|
|
|
int npartrelpruneinfos = list_length(partrelpruneinfos);
|
|
|
|
PartitionPruningData *prunedata;
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
ListCell *lc2;
|
2018-08-02 01:42:46 +02:00
|
|
|
int j;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
prunedata = (PartitionPruningData *)
|
|
|
|
palloc(offsetof(PartitionPruningData, partrelprunedata) +
|
|
|
|
npartrelpruneinfos * sizeof(PartitionedRelPruningData));
|
|
|
|
prunestate->partprunedata[i] = prunedata;
|
|
|
|
prunedata->num_partrelprunedata = npartrelpruneinfos;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
j = 0;
|
|
|
|
foreach(lc2, partrelpruneinfos)
|
2018-04-24 19:03:10 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2);
|
|
|
|
PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
|
|
|
|
PartitionPruneContext *context = &pprune->context;
|
|
|
|
PartitionDesc partdesc;
|
|
|
|
PartitionKey partkey;
|
|
|
|
int partnatts;
|
|
|
|
int n_steps;
|
2018-04-24 19:03:10 +02:00
|
|
|
ListCell *lc3;
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* We must copy the subplan_map rather than pointing directly to
|
|
|
|
* the plan's version, as we may end up making modifications to it
|
|
|
|
* later.
|
|
|
|
*/
|
|
|
|
pprune->subplan_map = palloc(sizeof(int) * pinfo->nparts);
|
|
|
|
memcpy(pprune->subplan_map, pinfo->subplan_map,
|
|
|
|
sizeof(int) * pinfo->nparts);
|
2018-04-24 19:03:10 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* We can use the subpart_map verbatim, since we never modify it */
|
|
|
|
pprune->subpart_map = pinfo->subpart_map;
|
2018-04-24 19:03:10 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* present_parts is also subject to later modification */
|
|
|
|
pprune->present_parts = bms_copy(pinfo->present_parts);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to hold a pin on the partitioned table's relcache entry
|
|
|
|
* so that we can rely on its copies of the table's partition key
|
|
|
|
* and partition descriptor. We need not get a lock though; one
|
|
|
|
* should have been acquired already by InitPlan or
|
|
|
|
* ExecLockNonLeafAppendTables.
|
|
|
|
*/
|
|
|
|
context->partrel = relation_open(pinfo->reloid, NoLock);
|
|
|
|
|
|
|
|
partkey = RelationGetPartitionKey(context->partrel);
|
|
|
|
partdesc = RelationGetPartitionDesc(context->partrel);
|
|
|
|
n_steps = list_length(pinfo->pruning_steps);
|
|
|
|
|
|
|
|
context->strategy = partkey->strategy;
|
|
|
|
context->partnatts = partnatts = partkey->partnatts;
|
|
|
|
context->nparts = pinfo->nparts;
|
|
|
|
context->boundinfo = partdesc->boundinfo;
|
|
|
|
context->partcollation = partkey->partcollation;
|
|
|
|
context->partsupfunc = partkey->partsupfunc;
|
|
|
|
|
|
|
|
/* We'll look up type-specific support functions as needed */
|
|
|
|
context->stepcmpfuncs = (FmgrInfo *)
|
|
|
|
palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
|
|
|
|
|
|
|
|
context->ppccontext = CurrentMemoryContext;
|
|
|
|
context->planstate = planstate;
|
|
|
|
|
|
|
|
/* Initialize expression state for each expression we need */
|
|
|
|
context->exprstates = (ExprState **)
|
|
|
|
palloc0(sizeof(ExprState *) * n_steps * partnatts);
|
|
|
|
foreach(lc3, pinfo->pruning_steps)
|
2018-04-24 19:03:10 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc3);
|
|
|
|
ListCell *lc4;
|
|
|
|
int keyno;
|
|
|
|
|
|
|
|
/* not needed for other step kinds */
|
|
|
|
if (!IsA(step, PartitionPruneStepOp))
|
|
|
|
continue;
|
2018-04-24 19:03:10 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
Assert(list_length(step->exprs) <= partnatts);
|
|
|
|
|
|
|
|
keyno = 0;
|
|
|
|
foreach(lc4, step->exprs)
|
2018-04-24 19:03:10 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
Expr *expr = (Expr *) lfirst(lc4);
|
|
|
|
|
|
|
|
/* not needed for Consts */
|
|
|
|
if (!IsA(expr, Const))
|
|
|
|
{
|
|
|
|
int stateidx = PruneCxtStateIdx(partnatts,
|
|
|
|
step->step.step_id,
|
|
|
|
keyno);
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
context->exprstates[stateidx] =
|
|
|
|
ExecInitExpr(expr, context->planstate);
|
|
|
|
}
|
|
|
|
keyno++;
|
2018-04-24 19:03:10 +02:00
|
|
|
}
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* Array is not modified at runtime, so just point to plan's copy */
|
|
|
|
context->exprhasexecparam = pinfo->hasexecparam;
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
pprune->pruning_steps = pinfo->pruning_steps;
|
|
|
|
pprune->do_initial_prune = pinfo->do_initial_prune;
|
|
|
|
pprune->do_exec_prune = pinfo->do_exec_prune;
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* Record if pruning would be useful at any level */
|
|
|
|
prunestate->do_initial_prune |= pinfo->do_initial_prune;
|
|
|
|
prunestate->do_exec_prune |= pinfo->do_exec_prune;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* Accumulate the IDs of all PARAM_EXEC Params affecting the
|
|
|
|
* partitioning decisions at this plan node.
|
|
|
|
*/
|
|
|
|
prunestate->execparamids = bms_add_members(prunestate->execparamids,
|
|
|
|
pinfo->execparamids);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
j++;
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return prunestate;
|
|
|
|
}
|
|
|
|
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
/*
|
|
|
|
* ExecDestroyPartitionPruneState
|
|
|
|
* Release resources at plan shutdown.
|
|
|
|
*
|
|
|
|
* We don't bother to free any memory here, since the whole executor context
|
|
|
|
* will be going away shortly. We do need to release our relcache pins.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecDestroyPartitionPruneState(PartitionPruneState *prunestate)
|
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionPruningData **partprunedata = prunestate->partprunedata;
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < prunestate->num_partprunedata; i++)
|
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionPruningData *prunedata = partprunedata[i];
|
|
|
|
PartitionedRelPruningData *pprune = prunedata->partrelprunedata;
|
|
|
|
int j;
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
for (j = 0; j < prunedata->num_partrelprunedata; j++)
|
|
|
|
relation_close(pprune[j].context.partrel, NoLock);
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
/*
|
|
|
|
* ExecFindInitialMatchingSubPlans
|
2018-06-11 00:24:34 +02:00
|
|
|
* Identify the set of subplans that cannot be eliminated by initial
|
|
|
|
* pruning (disregarding any pruning constraints involving PARAM_EXEC
|
|
|
|
* Params). Also re-map the translation matrix which allows conversion
|
|
|
|
* of partition indexes into subplan indexes to account for the unneeded
|
|
|
|
* subplans having been removed.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* Must only be called once per 'prunestate', and only if initial pruning
|
|
|
|
* is required.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* 'nsubplans' must be passed as the total number of unpruned subplans.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
Bitmapset *
|
2018-06-11 00:24:34 +02:00
|
|
|
ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
|
|
|
Bitmapset *result = NULL;
|
2018-08-02 01:42:46 +02:00
|
|
|
MemoryContext oldcontext;
|
|
|
|
int i;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-06-10 21:22:25 +02:00
|
|
|
Assert(prunestate->do_initial_prune);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Switch to a temp context to avoid leaking memory in the executor's
|
|
|
|
* memory context.
|
|
|
|
*/
|
|
|
|
oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* For each hierarchy, do the pruning tests, and add deletable subplans'
|
|
|
|
* indexes to "result".
|
|
|
|
*/
|
|
|
|
for (i = 0; i < prunestate->num_partprunedata; i++)
|
|
|
|
{
|
|
|
|
PartitionPruningData *prunedata;
|
|
|
|
PartitionedRelPruningData *pprune;
|
|
|
|
|
|
|
|
prunedata = prunestate->partprunedata[i];
|
|
|
|
pprune = &prunedata->partrelprunedata[0];
|
|
|
|
|
|
|
|
/* Perform pruning without using PARAM_EXEC Params */
|
|
|
|
find_matching_subplans_recurse(prunedata, pprune, true, &result);
|
|
|
|
|
|
|
|
/* Expression eval may have used space in node's ps_ExprContext too */
|
|
|
|
ResetExprContext(pprune->context.planstate->ps_ExprContext);
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
2018-06-10 21:22:25 +02:00
|
|
|
/* Copy result out of the temp context before we reset it */
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
result = bms_copy(result);
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* Add in any subplans that partition pruning didn't account for */
|
|
|
|
result = bms_add_members(result, prunestate->other_subplans);
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
MemoryContextReset(prunestate->prune_context);
|
|
|
|
|
|
|
|
/*
|
2018-06-11 00:24:34 +02:00
|
|
|
* If any subplans were pruned, we must re-sequence the subplan indexes so
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
* that ExecFindMatchingSubPlans properly returns the indexes from the
|
2018-06-11 00:24:34 +02:00
|
|
|
* subplans which will remain after execution of this function.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
2018-06-11 00:24:34 +02:00
|
|
|
if (bms_num_members(result) < nsubplans)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-06-11 00:24:34 +02:00
|
|
|
int *new_subplan_indexes;
|
2018-08-02 01:42:46 +02:00
|
|
|
Bitmapset *new_other_subplans;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
int i;
|
|
|
|
int newidx;
|
|
|
|
|
|
|
|
/*
|
2018-06-11 23:35:43 +02:00
|
|
|
* First we must build a temporary array which maps old subplan
|
2018-08-02 01:42:46 +02:00
|
|
|
* indexes to new ones. While we're at it, also recompute the
|
|
|
|
* other_subplans set, since indexes in it may change.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
2018-06-11 00:24:34 +02:00
|
|
|
new_subplan_indexes = (int *) palloc(sizeof(int) * nsubplans);
|
2018-08-02 01:42:46 +02:00
|
|
|
new_other_subplans = NULL;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
newidx = 0;
|
2018-06-11 00:24:34 +02:00
|
|
|
for (i = 0; i < nsubplans; i++)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
|
|
|
if (bms_is_member(i, result))
|
2018-06-11 00:24:34 +02:00
|
|
|
new_subplan_indexes[i] = newidx++;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
else
|
2018-06-11 00:24:34 +02:00
|
|
|
new_subplan_indexes[i] = -1; /* Newly pruned */
|
2018-08-02 01:42:46 +02:00
|
|
|
|
|
|
|
if (bms_is_member(i, prunestate->other_subplans))
|
|
|
|
new_other_subplans = bms_add_member(new_other_subplans,
|
|
|
|
new_subplan_indexes[i]);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
}
|
2018-08-02 01:42:46 +02:00
|
|
|
bms_free(prunestate->other_subplans);
|
|
|
|
prunestate->other_subplans = new_other_subplans;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
/*
|
2018-08-02 01:42:46 +02:00
|
|
|
* Now we can update each PartitionedRelPruneInfo's subplan_map with
|
|
|
|
* new subplan indexes. We must also recompute its present_parts
|
|
|
|
* bitmap.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
2018-08-02 01:42:46 +02:00
|
|
|
for (i = 0; i < prunestate->num_partprunedata; i++)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionPruningData *prunedata = prunestate->partprunedata[i];
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
int j;
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* Within each hierarchy, we perform this loop in back-to-front
|
|
|
|
* order so that we determine present_parts for the lowest-level
|
|
|
|
* partitioned tables first. This way we can tell whether a
|
|
|
|
* sub-partitioned table's partitions were entirely pruned so we
|
|
|
|
* can exclude that from 'present_parts'.
|
|
|
|
*/
|
|
|
|
for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
|
|
|
|
int nparts = pprune->context.nparts;
|
|
|
|
int k;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* We just rebuild present_parts from scratch */
|
|
|
|
bms_free(pprune->present_parts);
|
|
|
|
pprune->present_parts = NULL;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
for (k = 0; k < nparts; k++)
|
2018-06-10 21:22:25 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
int oldidx = pprune->subplan_map[k];
|
|
|
|
int subidx;
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* If this partition existed as a subplan then change the
|
|
|
|
* old subplan index to the new subplan index. The new
|
|
|
|
* index may become -1 if the partition was pruned above,
|
|
|
|
* or it may just come earlier in the subplan list due to
|
|
|
|
* some subplans being removed earlier in the list. If
|
|
|
|
* it's a subpartition, add it to present_parts unless
|
|
|
|
* it's entirely pruned.
|
|
|
|
*/
|
|
|
|
if (oldidx >= 0)
|
|
|
|
{
|
|
|
|
Assert(oldidx < nsubplans);
|
|
|
|
pprune->subplan_map[k] = new_subplan_indexes[oldidx];
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
if (new_subplan_indexes[oldidx] >= 0)
|
|
|
|
pprune->present_parts =
|
|
|
|
bms_add_member(pprune->present_parts, k);
|
|
|
|
}
|
|
|
|
else if ((subidx = pprune->subpart_map[k]) >= 0)
|
|
|
|
{
|
|
|
|
PartitionedRelPruningData *subprune;
|
|
|
|
|
|
|
|
subprune = &prunedata->partrelprunedata[subidx];
|
|
|
|
|
|
|
|
if (!bms_is_empty(subprune->present_parts))
|
|
|
|
pprune->present_parts =
|
|
|
|
bms_add_member(pprune->present_parts, k);
|
|
|
|
}
|
2018-06-10 21:22:25 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-11 00:24:34 +02:00
|
|
|
pfree(new_subplan_indexes);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecFindMatchingSubPlans
|
2018-04-09 16:17:12 +02:00
|
|
|
* Determine which subplans match the pruning steps detailed in
|
2018-06-11 00:24:34 +02:00
|
|
|
* 'prunestate' for the current comparison expression values.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-10 21:22:25 +02:00
|
|
|
* Here we assume we may evaluate PARAM_EXEC Params.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
Bitmapset *
|
|
|
|
ExecFindMatchingSubPlans(PartitionPruneState *prunestate)
|
|
|
|
{
|
|
|
|
Bitmapset *result = NULL;
|
2018-08-02 01:42:46 +02:00
|
|
|
MemoryContext oldcontext;
|
|
|
|
int i;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Switch to a temp context to avoid leaking memory in the executor's
|
|
|
|
* memory context.
|
|
|
|
*/
|
|
|
|
oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* For each hierarchy, do the pruning tests, and add deletable subplans'
|
|
|
|
* indexes to "result".
|
|
|
|
*/
|
|
|
|
for (i = 0; i < prunestate->num_partprunedata; i++)
|
|
|
|
{
|
|
|
|
PartitionPruningData *prunedata;
|
|
|
|
PartitionedRelPruningData *pprune;
|
|
|
|
|
|
|
|
prunedata = prunestate->partprunedata[i];
|
|
|
|
pprune = &prunedata->partrelprunedata[0];
|
|
|
|
|
|
|
|
find_matching_subplans_recurse(prunedata, pprune, false, &result);
|
|
|
|
|
|
|
|
/* Expression eval may have used space in node's ps_ExprContext too */
|
|
|
|
ResetExprContext(pprune->context.planstate->ps_ExprContext);
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
2018-06-10 21:22:25 +02:00
|
|
|
/* Copy result out of the temp context before we reset it */
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
result = bms_copy(result);
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* Add in any subplans that partition pruning didn't account for */
|
|
|
|
result = bms_add_members(result, prunestate->other_subplans);
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
MemoryContextReset(prunestate->prune_context);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-06-10 21:22:25 +02:00
|
|
|
* find_matching_subplans_recurse
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
* Recursive worker function for ExecFindMatchingSubPlans and
|
|
|
|
* ExecFindInitialMatchingSubPlans
|
2018-06-10 21:22:25 +02:00
|
|
|
*
|
|
|
|
* Adds valid (non-prunable) subplan IDs to *validsubplans
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
static void
|
2018-08-02 01:42:46 +02:00
|
|
|
find_matching_subplans_recurse(PartitionPruningData *prunedata,
|
|
|
|
PartitionedRelPruningData *pprune,
|
2018-06-10 21:22:25 +02:00
|
|
|
bool initial_prune,
|
|
|
|
Bitmapset **validsubplans)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
|
|
|
Bitmapset *partset;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Guard against stack overflow due to overly deep partition hierarchy. */
|
|
|
|
check_stack_depth();
|
|
|
|
|
2018-06-10 21:22:25 +02:00
|
|
|
/* Only prune if pruning would be useful at this level. */
|
|
|
|
if (initial_prune ? pprune->do_initial_prune : pprune->do_exec_prune)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-06-10 21:22:25 +02:00
|
|
|
PartitionPruneContext *context = &pprune->context;
|
|
|
|
|
|
|
|
/* Set whether we can evaluate PARAM_EXEC Params or not */
|
|
|
|
context->evalexecparams = !initial_prune;
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
partset = get_matching_partitions(context,
|
|
|
|
pprune->pruning_steps);
|
|
|
|
}
|
|
|
|
else
|
2018-06-10 21:22:25 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If no pruning is to be done, just include all partitions at this
|
|
|
|
* level.
|
|
|
|
*/
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
partset = pprune->present_parts;
|
2018-06-10 21:22:25 +02:00
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-06-11 00:24:34 +02:00
|
|
|
/* Translate partset into subplan indexes */
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
i = -1;
|
|
|
|
while ((i = bms_next_member(partset, i)) >= 0)
|
|
|
|
{
|
2018-06-11 00:24:34 +02:00
|
|
|
if (pprune->subplan_map[i] >= 0)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*validsubplans = bms_add_member(*validsubplans,
|
2018-06-11 00:24:34 +02:00
|
|
|
pprune->subplan_map[i]);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
int partidx = pprune->subpart_map[i];
|
|
|
|
|
2018-06-11 00:24:34 +02:00
|
|
|
if (partidx >= 0)
|
2018-08-02 01:42:46 +02:00
|
|
|
find_matching_subplans_recurse(prunedata,
|
|
|
|
&prunedata->partrelprunedata[partidx],
|
2018-06-10 21:22:25 +02:00
|
|
|
initial_prune, validsubplans);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
else
|
|
|
|
{
|
2018-08-16 17:43:04 +02:00
|
|
|
/*
|
|
|
|
* We get here if the planner already pruned all the sub-
|
|
|
|
* partitions for this partition. Silently ignore this
|
|
|
|
* partition in this case. The end result is the same: we
|
|
|
|
* would have pruned all partitions just the same, but we
|
|
|
|
* don't have any pruning steps to execute to verify this.
|
|
|
|
*/
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|