2017-11-15 16:23:28 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* execPartition.c
|
|
|
|
* Support routines for partitioning.
|
|
|
|
*
|
2019-01-02 18:44:25 +01:00
|
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
2017-11-15 16:23:28 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/executor/execPartition.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2019-01-21 19:18:20 +01:00
|
|
|
#include "access/table.h"
|
2018-04-15 02:12:14 +02:00
|
|
|
#include "catalog/partition.h"
|
2018-04-08 20:35:29 +02:00
|
|
|
#include "catalog/pg_inherits.h"
|
2018-03-26 15:43:54 +02:00
|
|
|
#include "catalog/pg_type.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "executor/execPartition.h"
|
|
|
|
#include "executor/executor.h"
|
2018-04-07 01:16:11 +02:00
|
|
|
#include "foreign/fdwapi.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#include "miscadmin.h"
|
2018-03-26 15:43:54 +02:00
|
|
|
#include "nodes/makefuncs.h"
|
2018-04-15 02:12:14 +02:00
|
|
|
#include "partitioning/partbounds.h"
|
2019-02-21 17:38:54 +01:00
|
|
|
#include "partitioning/partdesc.h"
|
2018-04-15 02:12:14 +02:00
|
|
|
#include "partitioning/partprune.h"
|
2018-04-16 20:50:57 +02:00
|
|
|
#include "rewrite/rewriteManip.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "utils/lsyscache.h"
|
2018-04-15 02:12:14 +02:00
|
|
|
#include "utils/partcache.h"
|
|
|
|
#include "utils/rel.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "utils/rls.h"
|
|
|
|
#include "utils/ruleutils.h"
|
|
|
|
|
2018-04-15 02:12:14 +02:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/*-----------------------
|
|
|
|
* PartitionTupleRouting - Encapsulates all information required to
|
|
|
|
* route a tuple inserted into a partitioned table to one of its leaf
|
|
|
|
* partitions.
|
|
|
|
*
|
|
|
|
* partition_root
|
|
|
|
* The partitioned table that's the target of the command.
|
|
|
|
*
|
|
|
|
* partition_dispatch_info
|
|
|
|
* Array of 'max_dispatch' elements containing a pointer to a
|
|
|
|
* PartitionDispatch object for every partitioned table touched by tuple
|
|
|
|
* routing. The entry for the target partitioned table is *always*
|
|
|
|
* present in the 0th element of this array. See comment for
|
|
|
|
* PartitionDispatchData->indexes for details on how this array is
|
|
|
|
* indexed.
|
|
|
|
*
|
|
|
|
* num_dispatch
|
|
|
|
* The current number of items stored in the 'partition_dispatch_info'
|
|
|
|
* array. Also serves as the index of the next free array element for
|
|
|
|
* new PartitionDispatch objects that need to be stored.
|
|
|
|
*
|
|
|
|
* max_dispatch
|
|
|
|
* The current allocated size of the 'partition_dispatch_info' array.
|
|
|
|
*
|
|
|
|
* partitions
|
|
|
|
* Array of 'max_partitions' elements containing a pointer to a
|
|
|
|
* ResultRelInfo for every leaf partitions touched by tuple routing.
|
|
|
|
* Some of these are pointers to ResultRelInfos which are borrowed out of
|
|
|
|
* 'subplan_resultrel_htab'. The remainder have been built especially
|
|
|
|
* for tuple routing. See comment for PartitionDispatchData->indexes for
|
|
|
|
* details on how this array is indexed.
|
|
|
|
*
|
|
|
|
* num_partitions
|
|
|
|
* The current number of items stored in the 'partitions' array. Also
|
|
|
|
* serves as the index of the next free array element for new
|
|
|
|
* ResultRelInfo objects that need to be stored.
|
|
|
|
*
|
|
|
|
* max_partitions
|
|
|
|
* The current allocated size of the 'partitions' array.
|
|
|
|
*
|
|
|
|
* subplan_resultrel_htab
|
|
|
|
* Hash table to store subplan ResultRelInfos by Oid. This is used to
|
|
|
|
* cache ResultRelInfos from subplans of an UPDATE ModifyTable node;
|
|
|
|
* NULL in other cases. Some of these may be useful for tuple routing
|
|
|
|
* to save having to build duplicates.
|
|
|
|
*
|
|
|
|
* memcxt
|
|
|
|
* Memory context used to allocate subsidiary structs.
|
|
|
|
*-----------------------
|
|
|
|
*/
|
2018-11-16 20:55:44 +01:00
|
|
|
struct PartitionTupleRouting
|
2018-11-16 18:54:15 +01:00
|
|
|
{
|
|
|
|
Relation partition_root;
|
|
|
|
PartitionDispatch *partition_dispatch_info;
|
|
|
|
int num_dispatch;
|
|
|
|
int max_dispatch;
|
|
|
|
ResultRelInfo **partitions;
|
|
|
|
int num_partitions;
|
|
|
|
int max_partitions;
|
|
|
|
HTAB *subplan_resultrel_htab;
|
|
|
|
MemoryContext memcxt;
|
2018-11-16 20:55:44 +01:00
|
|
|
};
|
2018-11-16 18:54:15 +01:00
|
|
|
|
2018-09-15 00:06:57 +02:00
|
|
|
/*-----------------------
|
|
|
|
* PartitionDispatch - information about one partitioned table in a partition
|
2018-11-16 18:54:15 +01:00
|
|
|
* hierarchy required to route a tuple to any of its partitions. A
|
|
|
|
* PartitionDispatch is always encapsulated inside a PartitionTupleRouting
|
|
|
|
* struct and stored inside its 'partition_dispatch_info' array.
|
2018-09-15 00:06:57 +02:00
|
|
|
*
|
2018-11-16 18:54:15 +01:00
|
|
|
* reldesc
|
|
|
|
* Relation descriptor of the table
|
2018-11-21 19:21:00 +01:00
|
|
|
*
|
2018-11-16 18:54:15 +01:00
|
|
|
* key
|
|
|
|
* Partition key information of the table
|
2018-11-21 19:21:00 +01:00
|
|
|
*
|
2018-11-16 18:54:15 +01:00
|
|
|
* keystate
|
|
|
|
* Execution state required for expressions in the partition key
|
2018-11-21 19:21:00 +01:00
|
|
|
*
|
2018-11-16 18:54:15 +01:00
|
|
|
* partdesc
|
|
|
|
* Partition descriptor of the table
|
2018-11-21 19:21:00 +01:00
|
|
|
*
|
2018-11-16 18:54:15 +01:00
|
|
|
* tupslot
|
|
|
|
* A standalone TupleTableSlot initialized with this table's tuple
|
|
|
|
* descriptor, or NULL if no tuple conversion between the parent is
|
|
|
|
* required.
|
2018-11-21 19:21:00 +01:00
|
|
|
*
|
2018-11-16 18:54:15 +01:00
|
|
|
* tupmap
|
|
|
|
* TupleConversionMap to convert from the parent's rowtype to this table's
|
|
|
|
* rowtype (when extracting the partition key of a tuple just before
|
|
|
|
* routing it through this table). A NULL value is stored if no tuple
|
|
|
|
* conversion is required.
|
2018-11-21 19:21:00 +01:00
|
|
|
*
|
2018-11-16 18:54:15 +01:00
|
|
|
* indexes
|
|
|
|
* Array of partdesc->nparts elements. For leaf partitions the index
|
|
|
|
* corresponds to the partition's ResultRelInfo in the encapsulating
|
|
|
|
* PartitionTupleRouting's partitions array. For partitioned partitions,
|
|
|
|
* the index corresponds to the PartitionDispatch for it in its
|
|
|
|
* partition_dispatch_info array. -1 indicates we've not yet allocated
|
|
|
|
* anything in PartitionTupleRouting for the partition.
|
2018-09-15 00:06:57 +02:00
|
|
|
*-----------------------
|
|
|
|
*/
|
|
|
|
typedef struct PartitionDispatchData
|
|
|
|
{
|
|
|
|
Relation reldesc;
|
|
|
|
PartitionKey key;
|
|
|
|
List *keystate; /* list of ExprState */
|
|
|
|
PartitionDesc partdesc;
|
|
|
|
TupleTableSlot *tupslot;
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
AttrNumber *tupmap;
|
2018-11-16 18:54:15 +01:00
|
|
|
int indexes[FLEXIBLE_ARRAY_MEMBER];
|
2018-09-15 00:06:57 +02:00
|
|
|
} PartitionDispatchData;
|
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/* struct to hold result relations coming from UPDATE subplans */
|
|
|
|
typedef struct SubplanResultRelHashElem
|
|
|
|
{
|
|
|
|
Oid relid; /* hash key -- must be first */
|
|
|
|
ResultRelInfo *rri;
|
|
|
|
} SubplanResultRelHashElem;
|
|
|
|
|
|
|
|
|
|
|
|
static void ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
|
|
|
|
PartitionTupleRouting *proute);
|
|
|
|
static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
|
|
|
|
EState *estate, PartitionTupleRouting *proute,
|
|
|
|
PartitionDispatch dispatch,
|
|
|
|
ResultRelInfo *rootResultRelInfo,
|
|
|
|
int partidx);
|
|
|
|
static void ExecInitRoutingInfo(ModifyTableState *mtstate,
|
|
|
|
EState *estate,
|
|
|
|
PartitionTupleRouting *proute,
|
|
|
|
PartitionDispatch dispatch,
|
|
|
|
ResultRelInfo *partRelInfo,
|
|
|
|
int partidx);
|
|
|
|
static PartitionDispatch ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute,
|
|
|
|
Oid partoid, PartitionDispatch parent_pd, int partidx);
|
2017-11-15 16:23:28 +01:00
|
|
|
static void FormPartitionKeyDatum(PartitionDispatch pd,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull);
|
2018-07-27 15:34:57 +02:00
|
|
|
static int get_partition_for_tuple(PartitionDispatch pd, Datum *values,
|
2018-04-15 02:12:14 +02:00
|
|
|
bool *isnull);
|
2017-11-15 16:23:28 +01:00
|
|
|
static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull,
|
|
|
|
int maxfieldlen);
|
2018-03-26 15:43:54 +02:00
|
|
|
static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map);
|
2018-08-02 01:42:46 +02:00
|
|
|
static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
|
|
|
|
PartitionedRelPruningData *pprune,
|
2018-06-10 21:22:25 +02:00
|
|
|
bool initial_prune,
|
|
|
|
Bitmapset **validsubplans);
|
2018-03-26 15:43:54 +02:00
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
2018-01-04 21:48:15 +01:00
|
|
|
* ExecSetupPartitionTupleRouting - sets up information needed during
|
|
|
|
* tuple routing for partitioned tables, encapsulates it in
|
|
|
|
* PartitionTupleRouting, and returns it.
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
2018-11-16 18:54:15 +01:00
|
|
|
* Callers must use the returned PartitionTupleRouting during calls to
|
|
|
|
* ExecFindPartition(). The actual ResultRelInfo for a partition is only
|
|
|
|
* allocated when the partition is found for the first time.
|
|
|
|
*
|
|
|
|
* The current memory context is used to allocate this struct and all
|
|
|
|
* subsidiary structs that will be allocated from it later on. Typically
|
|
|
|
* it should be estate->es_query_cxt.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
2018-01-04 21:48:15 +01:00
|
|
|
PartitionTupleRouting *
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
|
2017-11-15 16:23:28 +01:00
|
|
|
{
|
2018-01-04 21:48:15 +01:00
|
|
|
PartitionTupleRouting *proute;
|
2018-03-26 15:43:54 +02:00
|
|
|
ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
2018-11-16 18:54:15 +01:00
|
|
|
* Here we attempt to expend as little effort as possible in setting up
|
|
|
|
* the PartitionTupleRouting. Each partition's ResultRelInfo is built on
|
|
|
|
* demand, only when we actually need to route a tuple to that partition.
|
|
|
|
* The reason for this is that a common case is for INSERT to insert a
|
|
|
|
* single tuple into a partitioned table and this must be fast.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
2018-01-04 21:48:15 +01:00
|
|
|
proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
|
2018-11-16 18:54:15 +01:00
|
|
|
proute->partition_root = rel;
|
|
|
|
proute->memcxt = CurrentMemoryContext;
|
|
|
|
/* Rest of members initialized by zeroing */
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/*
|
|
|
|
* Initialize this table's PartitionDispatch object. Here we pass in the
|
|
|
|
* parent as NULL as we don't need to care about any parent of the target
|
|
|
|
* partitioned table.
|
|
|
|
*/
|
|
|
|
ExecInitPartitionDispatchInfo(proute, RelationGetRelid(rel), NULL, 0);
|
2018-01-04 21:48:15 +01:00
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
2018-11-16 18:54:15 +01:00
|
|
|
* If performing an UPDATE with tuple routing, we can reuse partition
|
|
|
|
* sub-plan result rels. We build a hash table to map the OIDs of
|
|
|
|
* partitions present in mtstate->resultRelInfo to their ResultRelInfos.
|
|
|
|
* Every time a tuple is routed to a partition that we've yet to set the
|
|
|
|
* ResultRelInfo for, before we go to the trouble of making one, we check
|
|
|
|
* for a pre-made one in the hash table.
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
if (node && node->operation == CMD_UPDATE)
|
|
|
|
ExecHashSubPlanResultRelsByOid(mtstate, proute);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
return proute;
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-11-16 18:54:15 +01:00
|
|
|
* ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
|
|
|
|
* the tuple contained in *slot should belong to.
|
|
|
|
*
|
|
|
|
* If the partition's ResultRelInfo does not yet exist in 'proute' then we set
|
|
|
|
* one up or reuse one from mtstate's resultRelInfo array. When reusing a
|
|
|
|
* ResultRelInfo from the mtstate we verify that the relation is a valid
|
|
|
|
* target for INSERTs and then set up a PartitionRoutingInfo for it.
|
|
|
|
*
|
|
|
|
* rootResultRelInfo is the relation named in the query.
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
|
|
|
* estate must be non-NULL; we'll need it to compute any expressions in the
|
2018-11-16 18:54:15 +01:00
|
|
|
* partition keys. Also, its per-tuple contexts are used as evaluation
|
|
|
|
* scratch space.
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
|
|
|
* If no leaf partition is found, this routine errors out with the appropriate
|
2018-11-22 05:23:54 +01:00
|
|
|
* error message. An error may also be raised if the found target partition
|
|
|
|
* is not a valid target for an INSERT.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
ResultRelInfo *
|
|
|
|
ExecFindPartition(ModifyTableState *mtstate,
|
|
|
|
ResultRelInfo *rootResultRelInfo,
|
|
|
|
PartitionTupleRouting *proute,
|
2017-11-15 16:23:28 +01:00
|
|
|
TupleTableSlot *slot, EState *estate)
|
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
PartitionDispatch *pd = proute->partition_dispatch_info;
|
2017-11-15 16:23:28 +01:00
|
|
|
Datum values[PARTITION_MAX_KEYS];
|
|
|
|
bool isnull[PARTITION_MAX_KEYS];
|
|
|
|
Relation rel;
|
2018-08-01 21:06:47 +02:00
|
|
|
PartitionDispatch dispatch;
|
2018-11-16 18:54:15 +01:00
|
|
|
PartitionDesc partdesc;
|
2017-11-15 16:23:28 +01:00
|
|
|
ExprContext *ecxt = GetPerTupleExprContext(estate);
|
|
|
|
TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
|
2018-08-01 21:06:47 +02:00
|
|
|
TupleTableSlot *myslot = NULL;
|
2018-10-04 20:03:37 +02:00
|
|
|
MemoryContext oldcxt;
|
2018-08-01 21:06:47 +02:00
|
|
|
|
|
|
|
/* use per-tuple context here to avoid leaking memory */
|
|
|
|
oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* First check the root table's partition constraint, if any. No point in
|
|
|
|
* routing the tuple if it doesn't belong in the root table itself.
|
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
if (rootResultRelInfo->ri_PartitionCheck)
|
|
|
|
ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/* start with the root partitioned table */
|
2018-08-01 21:06:47 +02:00
|
|
|
dispatch = pd[0];
|
2017-11-15 16:23:28 +01:00
|
|
|
while (true)
|
|
|
|
{
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
AttrNumber *map = dispatch->tupmap;
|
2018-11-16 18:54:15 +01:00
|
|
|
int partidx = -1;
|
|
|
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
2017-11-15 16:23:28 +01:00
|
|
|
|
2018-08-01 21:06:47 +02:00
|
|
|
rel = dispatch->reldesc;
|
2018-11-16 18:54:15 +01:00
|
|
|
partdesc = dispatch->partdesc;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
2018-08-01 21:06:47 +02:00
|
|
|
* Convert the tuple to this parent's layout, if different from the
|
|
|
|
* current relation.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
2018-08-01 21:06:47 +02:00
|
|
|
myslot = dispatch->tupslot;
|
2018-11-16 18:54:15 +01:00
|
|
|
if (myslot != NULL)
|
|
|
|
{
|
|
|
|
Assert(map != NULL);
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
slot = execute_attr_map_slot(map, slot, myslot);
|
2018-11-16 18:54:15 +01:00
|
|
|
}
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Extract partition key from tuple. Expression evaluation machinery
|
|
|
|
* that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
|
|
|
|
* point to the correct tuple slot. The slot might have changed from
|
|
|
|
* what was used for the parent table if the table of the current
|
|
|
|
* partitioning level has different tuple descriptor from the parent.
|
|
|
|
* So update ecxt_scantuple accordingly.
|
|
|
|
*/
|
|
|
|
ecxt->ecxt_scantuple = slot;
|
2018-08-01 21:06:47 +02:00
|
|
|
FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
|
2017-12-01 16:01:50 +01:00
|
|
|
|
|
|
|
/*
|
2018-11-16 18:54:15 +01:00
|
|
|
* If this partitioned table has no partitions or no partition for
|
|
|
|
* these values, error out.
|
2017-12-01 16:01:50 +01:00
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
if (partdesc->nparts == 0 ||
|
|
|
|
(partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
|
2017-12-01 16:01:50 +01:00
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
char *val_desc;
|
|
|
|
|
|
|
|
val_desc = ExecBuildSlotPartitionKeyDescription(rel,
|
|
|
|
values, isnull, 64);
|
|
|
|
Assert(OidIsValid(RelationGetRelid(rel)));
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
|
|
|
errmsg("no partition of relation \"%s\" found for row",
|
|
|
|
RelationGetRelationName(rel)),
|
|
|
|
val_desc ?
|
|
|
|
errdetail("Partition key of the failing row contains %s.",
|
|
|
|
val_desc) : 0));
|
2017-12-01 16:01:50 +01:00
|
|
|
}
|
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
if (partdesc->is_leaf[partidx])
|
2017-11-15 16:23:28 +01:00
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
ResultRelInfo *rri;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look to see if we've already got a ResultRelInfo for this
|
|
|
|
* partition.
|
|
|
|
*/
|
|
|
|
if (likely(dispatch->indexes[partidx] >= 0))
|
|
|
|
{
|
|
|
|
/* ResultRelInfo already built */
|
|
|
|
Assert(dispatch->indexes[partidx] < proute->num_partitions);
|
|
|
|
rri = proute->partitions[dispatch->indexes[partidx]];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bool found = false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have not yet set up a ResultRelInfo for this partition,
|
|
|
|
* but if we have a subplan hash table, we might have one
|
|
|
|
* there. If not, we'll have to create one.
|
|
|
|
*/
|
|
|
|
if (proute->subplan_resultrel_htab)
|
|
|
|
{
|
|
|
|
Oid partoid = partdesc->oids[partidx];
|
|
|
|
SubplanResultRelHashElem *elem;
|
|
|
|
|
|
|
|
elem = hash_search(proute->subplan_resultrel_htab,
|
|
|
|
&partoid, HASH_FIND, NULL);
|
|
|
|
if (elem)
|
|
|
|
{
|
|
|
|
found = true;
|
|
|
|
rri = elem->rri;
|
|
|
|
|
|
|
|
/* Verify this ResultRelInfo allows INSERTs */
|
|
|
|
CheckValidResultRel(rri, CMD_INSERT);
|
|
|
|
|
|
|
|
/* Set up the PartitionRoutingInfo for it */
|
|
|
|
ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
|
|
|
|
rri, partidx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We need to create a new one. */
|
|
|
|
if (!found)
|
|
|
|
rri = ExecInitPartitionInfo(mtstate, estate, proute,
|
|
|
|
dispatch,
|
|
|
|
rootResultRelInfo, partidx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Release the tuple in the lowest parent's dedicated slot. */
|
|
|
|
if (slot == myslot)
|
|
|
|
ExecClearTuple(myslot);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcxt);
|
|
|
|
ecxt->ecxt_scantuple = ecxt_scantuple_old;
|
|
|
|
return rri;
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
|
|
|
else
|
2018-08-01 21:06:47 +02:00
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
/*
|
|
|
|
* Partition is a sub-partitioned table; get the PartitionDispatch
|
|
|
|
*/
|
|
|
|
if (likely(dispatch->indexes[partidx] >= 0))
|
|
|
|
{
|
|
|
|
/* Already built. */
|
|
|
|
Assert(dispatch->indexes[partidx] < proute->num_dispatch);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Move down to the next partition level and search again
|
|
|
|
* until we find a leaf partition that matches this tuple
|
|
|
|
*/
|
|
|
|
dispatch = pd[dispatch->indexes[partidx]];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Not yet built. Do that now. */
|
|
|
|
PartitionDispatch subdispatch;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the new PartitionDispatch. We pass the current one
|
|
|
|
* in as the parent PartitionDispatch
|
|
|
|
*/
|
|
|
|
subdispatch = ExecInitPartitionDispatchInfo(proute,
|
|
|
|
partdesc->oids[partidx],
|
|
|
|
dispatch, partidx);
|
|
|
|
Assert(dispatch->indexes[partidx] >= 0 &&
|
|
|
|
dispatch->indexes[partidx] < proute->num_dispatch);
|
|
|
|
dispatch = subdispatch;
|
|
|
|
}
|
2018-08-01 21:06:47 +02:00
|
|
|
}
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
2018-11-16 18:54:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecHashSubPlanResultRelsByOid
|
|
|
|
* Build a hash table to allow fast lookups of subplan ResultRelInfos by
|
|
|
|
* partition Oid. We also populate the subplan ResultRelInfo with an
|
|
|
|
* ri_PartitionRoot.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
|
|
|
|
PartitionTupleRouting *proute)
|
|
|
|
{
|
|
|
|
HASHCTL ctl;
|
|
|
|
HTAB *htab;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
memset(&ctl, 0, sizeof(ctl));
|
|
|
|
ctl.keysize = sizeof(Oid);
|
|
|
|
ctl.entrysize = sizeof(SubplanResultRelHashElem);
|
|
|
|
ctl.hcxt = CurrentMemoryContext;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
htab = hash_create("PartitionTupleRouting table", mtstate->mt_nplans,
|
|
|
|
&ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
|
|
|
|
proute->subplan_resultrel_htab = htab;
|
2018-08-01 21:06:47 +02:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/* Hash all subplans by their Oid */
|
|
|
|
for (i = 0; i < mtstate->mt_nplans; i++)
|
2017-11-15 16:23:28 +01:00
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
ResultRelInfo *rri = &mtstate->resultRelInfo[i];
|
|
|
|
bool found;
|
|
|
|
Oid partoid = RelationGetRelid(rri->ri_RelationDesc);
|
|
|
|
SubplanResultRelHashElem *elem;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
elem = (SubplanResultRelHashElem *)
|
|
|
|
hash_search(htab, &partoid, HASH_ENTER, &found);
|
|
|
|
Assert(!found);
|
|
|
|
elem->rri = rri;
|
2018-08-01 21:06:47 +02:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/*
|
|
|
|
* This is required in order to convert the partition's tuple to be
|
|
|
|
* compatible with the root partitioned table's tuple descriptor. When
|
|
|
|
* generating the per-subplan result rels, this was not set.
|
|
|
|
*/
|
|
|
|
rri->ri_PartitionRoot = proute->partition_root;
|
|
|
|
}
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/*
|
|
|
|
* ExecInitPartitionInfo
|
Delay lock acquisition for partitions until we route a tuple to them.
Instead of locking all partitions to which we might route a tuple at
executor startup, just lock them as we use them. In some cases such a
partition might get locked at executor startup anyway because it
appears in the query's range table for some other reason, but in other
cases this is a bit savings.
This changes the order in which partitions are locked in some cases,
which might conceivably create deadlock hazards that don't exist
today, but per discussion, it seems like such cases should be rare
enough that we can neglect them in favor of improving performance.
David Rowley, reviewed and tested by Tomas Vondra, Sho Kato, John
Naylor, Tom Lane, and me.
Discussion: http://postgr.es/m/CAKJS1f-=FnMqmQP6qitkD+xEddxw22ySLP-0xFk3JAqUX2yfMw@mail.gmail.com
2019-02-21 17:24:40 +01:00
|
|
|
* Lock the partition and initialize ResultRelInfo. Also setup other
|
|
|
|
* information for the partition and store it in the next empty slot in
|
|
|
|
* the proute->partitions array.
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
*
|
|
|
|
* Returns the ResultRelInfo
|
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
static ResultRelInfo *
|
|
|
|
ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
PartitionTupleRouting *proute,
|
2018-11-16 18:54:15 +01:00
|
|
|
PartitionDispatch dispatch,
|
|
|
|
ResultRelInfo *rootResultRelInfo,
|
|
|
|
int partidx)
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
{
|
2018-04-07 01:16:11 +02:00
|
|
|
ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
|
2018-11-16 18:54:15 +01:00
|
|
|
Relation rootrel = rootResultRelInfo->ri_RelationDesc,
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
partrel;
|
2018-04-16 20:50:57 +02:00
|
|
|
Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ResultRelInfo *leaf_part_rri;
|
2018-11-16 18:54:15 +01:00
|
|
|
MemoryContext oldcxt;
|
2018-04-16 20:50:57 +02:00
|
|
|
AttrNumber *part_attnos = NULL;
|
|
|
|
bool found_whole_row;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
oldcxt = MemoryContextSwitchTo(proute->memcxt);
|
|
|
|
|
Delay lock acquisition for partitions until we route a tuple to them.
Instead of locking all partitions to which we might route a tuple at
executor startup, just lock them as we use them. In some cases such a
partition might get locked at executor startup anyway because it
appears in the query's range table for some other reason, but in other
cases this is a bit savings.
This changes the order in which partitions are locked in some cases,
which might conceivably create deadlock hazards that don't exist
today, but per discussion, it seems like such cases should be rare
enough that we can neglect them in favor of improving performance.
David Rowley, reviewed and tested by Tomas Vondra, Sho Kato, John
Naylor, Tom Lane, and me.
Discussion: http://postgr.es/m/CAKJS1f-=FnMqmQP6qitkD+xEddxw22ySLP-0xFk3JAqUX2yfMw@mail.gmail.com
2019-02-21 17:24:40 +01:00
|
|
|
partrel = table_open(dispatch->partdesc->oids[partidx], RowExclusiveLock);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
2018-03-26 15:43:54 +02:00
|
|
|
leaf_part_rri = makeNode(ResultRelInfo);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
InitResultRelInfo(leaf_part_rri,
|
|
|
|
partrel,
|
2018-10-07 20:33:17 +02:00
|
|
|
node ? node->rootRelation : 1,
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
rootrel,
|
|
|
|
estate->es_instrument);
|
|
|
|
|
Fix interaction of foreign tuple routing with remote triggers.
Without these fixes, changes to the inserted tuple made by remote
triggers are ignored when building local RETURNING tuples.
In the core code, call ExecInitRoutingInfo at a later point from
within ExecInitPartitionInfo so that the FDW callback gets invoked
after the returning list has been built. But move CheckValidResultRel
out of ExecInitRoutingInfo so that it can happen at an earlier stage.
In postgres_fdw, refactor assorted deparsing functions to work with
the RTE rather than the PlannerInfo, which saves us having to
construct a fake PlannerInfo in cases where we don't have a real one.
Then, we can pass down a constructed RTE that yields the correct
deparse result when no real one exists. Unfortunately, this
necessitates a hack that understands how the core code manages RT
indexes for update tuple routing, which is ugly, but we don't have a
better idea right now.
Original report, analysis, and patch by Etsuro Fujita. Heavily
refactored by me. Then worked over some more by Amit Langote.
Discussion: http://postgr.es/m/5AD4882B.10002@lab.ntt.co.jp
2018-05-01 19:21:46 +02:00
|
|
|
/*
|
|
|
|
* Verify result relation is a valid target for an INSERT. An UPDATE of a
|
|
|
|
* partition-key becomes a DELETE+INSERT operation, so this check is still
|
|
|
|
* required when the operation is CMD_UPDATE.
|
|
|
|
*/
|
|
|
|
CheckValidResultRel(leaf_part_rri, CMD_INSERT);
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/*
|
|
|
|
* Open partition indices. The user may have asked to check for conflicts
|
|
|
|
* within this leaf partition and do "nothing" instead of throwing an
|
|
|
|
* error. Be prepared in that case by initializing the index information
|
|
|
|
* needed by ExecInsert() to perform speculative insertions.
|
|
|
|
*/
|
|
|
|
if (partrel->rd_rel->relhasindex &&
|
|
|
|
leaf_part_rri->ri_IndexRelationDescs == NULL)
|
|
|
|
ExecOpenIndices(leaf_part_rri,
|
2018-03-19 22:09:43 +01:00
|
|
|
(node != NULL &&
|
|
|
|
node->onConflictAction != ONCONFLICT_NONE));
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Build WITH CHECK OPTION constraints for the partition. Note that we
|
|
|
|
* didn't build the withCheckOptionList for partitions within the planner,
|
|
|
|
* but simple translation of varattnos will suffice. This only occurs for
|
|
|
|
* the INSERT case or in the case of UPDATE tuple routing where we didn't
|
|
|
|
* find a result rel to reuse in ExecSetupPartitionTupleRouting().
|
|
|
|
*/
|
|
|
|
if (node && node->withCheckOptionLists != NIL)
|
|
|
|
{
|
|
|
|
List *wcoList;
|
|
|
|
List *wcoExprs = NIL;
|
|
|
|
ListCell *ll;
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the case of INSERT on a partitioned table, there is only one
|
|
|
|
* plan. Likewise, there is only one WCO list, not one per partition.
|
|
|
|
* For UPDATE, there are as many WCO lists as there are plans.
|
|
|
|
*/
|
|
|
|
Assert((node->operation == CMD_INSERT &&
|
|
|
|
list_length(node->withCheckOptionLists) == 1 &&
|
|
|
|
list_length(node->plans) == 1) ||
|
|
|
|
(node->operation == CMD_UPDATE &&
|
|
|
|
list_length(node->withCheckOptionLists) ==
|
|
|
|
list_length(node->plans)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the WCO list of the first plan as a reference to calculate
|
|
|
|
* attno's for the WCO list of this partition. In the INSERT case,
|
|
|
|
* that refers to the root partitioned table, whereas in the UPDATE
|
|
|
|
* tuple routing case, that refers to the first partition in the
|
|
|
|
* mtstate->resultRelInfo array. In any case, both that relation and
|
|
|
|
* this partition should have the same columns, so we should be able
|
|
|
|
* to map attributes successfully.
|
|
|
|
*/
|
|
|
|
wcoList = linitial(node->withCheckOptionLists);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert Vars in it to contain this partition's attribute numbers.
|
|
|
|
*/
|
2018-04-19 21:46:53 +02:00
|
|
|
part_attnos =
|
|
|
|
convert_tuples_by_name_map(RelationGetDescr(partrel),
|
|
|
|
RelationGetDescr(firstResultRel),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
wcoList = (List *)
|
|
|
|
map_variable_attnos((Node *) wcoList,
|
|
|
|
firstVarno, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
2018-04-16 20:50:57 +02:00
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
foreach(ll, wcoList)
|
|
|
|
{
|
|
|
|
WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
|
|
|
|
ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
|
2018-03-06 02:49:59 +01:00
|
|
|
&mtstate->ps);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
wcoExprs = lappend(wcoExprs, wcoExpr);
|
|
|
|
}
|
|
|
|
|
|
|
|
leaf_part_rri->ri_WithCheckOptions = wcoList;
|
|
|
|
leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build the RETURNING projection for the partition. Note that we didn't
|
|
|
|
* build the returningList for partitions within the planner, but simple
|
|
|
|
* translation of varattnos will suffice. This only occurs for the INSERT
|
|
|
|
* case or in the case of UPDATE tuple routing where we didn't find a
|
|
|
|
* result rel to reuse in ExecSetupPartitionTupleRouting().
|
|
|
|
*/
|
|
|
|
if (node && node->returningLists != NIL)
|
|
|
|
{
|
|
|
|
TupleTableSlot *slot;
|
|
|
|
ExprContext *econtext;
|
|
|
|
List *returningList;
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
|
|
|
|
/* See the comment above for WCO lists. */
|
|
|
|
Assert((node->operation == CMD_INSERT &&
|
|
|
|
list_length(node->returningLists) == 1 &&
|
|
|
|
list_length(node->plans) == 1) ||
|
|
|
|
(node->operation == CMD_UPDATE &&
|
|
|
|
list_length(node->returningLists) ==
|
|
|
|
list_length(node->plans)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the RETURNING list of the first plan as a reference to
|
|
|
|
* calculate attno's for the RETURNING list of this partition. See
|
|
|
|
* the comment above for WCO lists for more details on why this is
|
|
|
|
* okay.
|
|
|
|
*/
|
|
|
|
returningList = linitial(node->returningLists);
|
|
|
|
|
2018-04-19 21:46:53 +02:00
|
|
|
/*
|
|
|
|
* Convert Vars in it to contain this partition's attribute numbers.
|
|
|
|
*/
|
|
|
|
if (part_attnos == NULL)
|
|
|
|
part_attnos =
|
|
|
|
convert_tuples_by_name_map(RelationGetDescr(partrel),
|
|
|
|
RelationGetDescr(firstResultRel),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
returningList = (List *)
|
|
|
|
map_variable_attnos((Node *) returningList,
|
|
|
|
firstVarno, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
2018-04-16 20:50:57 +02:00
|
|
|
|
2018-04-07 01:16:11 +02:00
|
|
|
leaf_part_rri->ri_returningList = returningList;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the projection itself.
|
|
|
|
*
|
|
|
|
* Use the slot and the expression context that would have been set up
|
|
|
|
* in ExecInitModifyTable() for projection's output.
|
|
|
|
*/
|
|
|
|
Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
|
|
|
|
slot = mtstate->ps.ps_ResultTupleSlot;
|
|
|
|
Assert(mtstate->ps.ps_ExprContext != NULL);
|
|
|
|
econtext = mtstate->ps.ps_ExprContext;
|
|
|
|
leaf_part_rri->ri_projectReturning =
|
|
|
|
ExecBuildProjectionInfo(returningList, econtext, slot,
|
|
|
|
&mtstate->ps, RelationGetDescr(partrel));
|
|
|
|
}
|
|
|
|
|
Fix interaction of foreign tuple routing with remote triggers.
Without these fixes, changes to the inserted tuple made by remote
triggers are ignored when building local RETURNING tuples.
In the core code, call ExecInitRoutingInfo at a later point from
within ExecInitPartitionInfo so that the FDW callback gets invoked
after the returning list has been built. But move CheckValidResultRel
out of ExecInitRoutingInfo so that it can happen at an earlier stage.
In postgres_fdw, refactor assorted deparsing functions to work with
the RTE rather than the PlannerInfo, which saves us having to
construct a fake PlannerInfo in cases where we don't have a real one.
Then, we can pass down a constructed RTE that yields the correct
deparse result when no real one exists. Unfortunately, this
necessitates a hack that understands how the core code manages RT
indexes for update tuple routing, which is ugly, but we don't have a
better idea right now.
Original report, analysis, and patch by Etsuro Fujita. Heavily
refactored by me. Then worked over some more by Amit Langote.
Discussion: http://postgr.es/m/5AD4882B.10002@lab.ntt.co.jp
2018-05-01 19:21:46 +02:00
|
|
|
/* Set up information needed for routing tuples to the partition. */
|
2018-11-16 18:54:15 +01:00
|
|
|
ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
|
|
|
|
leaf_part_rri, partidx);
|
Fix interaction of foreign tuple routing with remote triggers.
Without these fixes, changes to the inserted tuple made by remote
triggers are ignored when building local RETURNING tuples.
In the core code, call ExecInitRoutingInfo at a later point from
within ExecInitPartitionInfo so that the FDW callback gets invoked
after the returning list has been built. But move CheckValidResultRel
out of ExecInitRoutingInfo so that it can happen at an earlier stage.
In postgres_fdw, refactor assorted deparsing functions to work with
the RTE rather than the PlannerInfo, which saves us having to
construct a fake PlannerInfo in cases where we don't have a real one.
Then, we can pass down a constructed RTE that yields the correct
deparse result when no real one exists. Unfortunately, this
necessitates a hack that understands how the core code manages RT
indexes for update tuple routing, which is ugly, but we don't have a
better idea right now.
Original report, analysis, and patch by Etsuro Fujita. Heavily
refactored by me. Then worked over some more by Amit Langote.
Discussion: http://postgr.es/m/5AD4882B.10002@lab.ntt.co.jp
2018-05-01 19:21:46 +02:00
|
|
|
|
2018-03-26 15:43:54 +02:00
|
|
|
/*
|
|
|
|
* If there is an ON CONFLICT clause, initialize state for it.
|
|
|
|
*/
|
|
|
|
if (node && node->onConflictAction != ONCONFLICT_NONE)
|
|
|
|
{
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
TupleDesc partrelDesc = RelationGetDescr(partrel);
|
|
|
|
ExprContext *econtext = mtstate->ps.ps_ExprContext;
|
|
|
|
ListCell *lc;
|
|
|
|
List *arbiterIndexes = NIL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is a list of arbiter indexes, map it to a list of indexes
|
|
|
|
* in the partition. We do that by scanning the partition's index
|
|
|
|
* list and searching for ancestry relationships to each index in the
|
|
|
|
* ancestor table.
|
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) > 0)
|
2018-03-26 15:43:54 +02:00
|
|
|
{
|
|
|
|
List *childIdxs;
|
|
|
|
|
|
|
|
childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
|
|
|
|
|
|
|
|
foreach(lc, childIdxs)
|
|
|
|
{
|
|
|
|
Oid childIdx = lfirst_oid(lc);
|
|
|
|
List *ancestors;
|
|
|
|
ListCell *lc2;
|
|
|
|
|
|
|
|
ancestors = get_partition_ancestors(childIdx);
|
2018-11-16 18:54:15 +01:00
|
|
|
foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
|
2018-03-26 15:43:54 +02:00
|
|
|
{
|
|
|
|
if (list_member_oid(ancestors, lfirst_oid(lc2)))
|
|
|
|
arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
|
|
|
|
}
|
|
|
|
list_free(ancestors);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the resulting lists are of inequal length, something is wrong.
|
|
|
|
* (This shouldn't happen, since arbiter index selection should not
|
|
|
|
* pick up an invalid index.)
|
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
|
2018-03-26 15:43:54 +02:00
|
|
|
list_length(arbiterIndexes))
|
|
|
|
elog(ERROR, "invalid arbiter index list");
|
|
|
|
leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the DO UPDATE case, we have some more state to initialize.
|
|
|
|
*/
|
|
|
|
if (node->onConflictAction == ONCONFLICT_UPDATE)
|
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
TupleConversionMap *map;
|
2019-03-07 00:43:33 +01:00
|
|
|
TupleDesc leaf_desc;
|
2018-11-16 18:54:15 +01:00
|
|
|
|
|
|
|
map = leaf_part_rri->ri_PartitionInfo->pi_RootToPartitionMap;
|
2019-03-07 00:43:33 +01:00
|
|
|
leaf_desc = RelationGetDescr(leaf_part_rri->ri_RelationDesc);
|
2018-11-16 18:54:15 +01:00
|
|
|
|
2018-03-26 15:43:54 +02:00
|
|
|
Assert(node->onConflictSet != NIL);
|
2018-11-16 18:54:15 +01:00
|
|
|
Assert(rootResultRelInfo->ri_onConflict != NULL);
|
2018-03-26 15:43:54 +02:00
|
|
|
|
2019-03-07 00:43:33 +01:00
|
|
|
leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Need a separate existing slot for each partition, as the
|
|
|
|
* partition could be of a different AM, even if the tuple
|
|
|
|
* descriptors match.
|
|
|
|
*/
|
|
|
|
leaf_part_rri->ri_onConflict->oc_Existing =
|
|
|
|
ExecInitExtraTupleSlot(mtstate->ps.state,
|
|
|
|
leaf_desc,
|
|
|
|
&TTSOpsBufferHeapTuple);
|
|
|
|
|
2018-03-26 15:43:54 +02:00
|
|
|
/*
|
|
|
|
* If the partition's tuple descriptor matches exactly the root
|
2019-03-07 00:43:33 +01:00
|
|
|
* parent (the common case), we can re-use most of the parent's ON
|
2018-03-26 15:43:54 +02:00
|
|
|
* CONFLICT SET state, skipping a bunch of work. Otherwise, we
|
|
|
|
* need to create state specific to this partition.
|
|
|
|
*/
|
|
|
|
if (map == NULL)
|
2019-03-07 00:43:33 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* It's safe to reuse these from the partition root, as we
|
|
|
|
* only process one tuple at a time (therefore we won't
|
|
|
|
* overwrite needed data in slots), and the results of
|
|
|
|
* projections are independent of the underlying
|
|
|
|
* storage. Projections and where clauses themselves don't
|
|
|
|
* store state / are independent of the underlying storage.
|
|
|
|
*/
|
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjSlot =
|
|
|
|
rootResultRelInfo->ri_onConflict->oc_ProjSlot;
|
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjInfo =
|
|
|
|
rootResultRelInfo->ri_onConflict->oc_ProjInfo;
|
|
|
|
leaf_part_rri->ri_onConflict->oc_WhereClause =
|
|
|
|
rootResultRelInfo->ri_onConflict->oc_WhereClause;
|
|
|
|
}
|
2018-03-26 15:43:54 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
List *onconflset;
|
|
|
|
TupleDesc tupDesc;
|
|
|
|
bool found_whole_row;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Translate expressions in onConflictSet to account for
|
|
|
|
* different attribute numbers. For that, map partition
|
|
|
|
* varattnos twice: first to catch the EXCLUDED
|
|
|
|
* pseudo-relation (INNER_VAR), and second to handle the main
|
|
|
|
* target relation (firstVarno).
|
|
|
|
*/
|
|
|
|
onconflset = (List *) copyObject((Node *) node->onConflictSet);
|
2018-04-19 21:46:53 +02:00
|
|
|
if (part_attnos == NULL)
|
|
|
|
part_attnos =
|
|
|
|
convert_tuples_by_name_map(RelationGetDescr(partrel),
|
|
|
|
RelationGetDescr(firstResultRel),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
onconflset = (List *)
|
|
|
|
map_variable_attnos((Node *) onconflset,
|
|
|
|
INNER_VAR, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
|
|
|
onconflset = (List *)
|
|
|
|
map_variable_attnos((Node *) onconflset,
|
|
|
|
firstVarno, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
|
|
|
|
|
|
|
/* Finally, adjust this tlist to match the partition. */
|
|
|
|
onconflset = adjust_partition_tlist(onconflset, map);
|
2018-03-26 15:43:54 +02:00
|
|
|
|
2019-03-07 00:43:33 +01:00
|
|
|
/* create the tuple slot for the UPDATE SET projection */
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
tupDesc = ExecTypeFromTL(onconflset);
|
2019-03-07 00:43:33 +01:00
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjSlot =
|
|
|
|
ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc,
|
|
|
|
&TTSOpsVirtual);
|
|
|
|
|
|
|
|
/* build UPDATE SET projection state */
|
2018-03-26 15:43:54 +02:00
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjInfo =
|
|
|
|
ExecBuildProjectionInfo(onconflset, econtext,
|
2019-03-07 00:43:33 +01:00
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjSlot,
|
2018-03-26 15:43:54 +02:00
|
|
|
&mtstate->ps, partrelDesc);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is a WHERE clause, initialize state where it will
|
|
|
|
* be evaluated, mapping the attribute numbers appropriately.
|
|
|
|
* As with onConflictSet, we need to map partition varattnos
|
|
|
|
* to the partition's tupdesc.
|
|
|
|
*/
|
|
|
|
if (node->onConflictWhere)
|
|
|
|
{
|
|
|
|
List *clause;
|
|
|
|
|
|
|
|
clause = copyObject((List *) node->onConflictWhere);
|
2018-04-19 21:46:53 +02:00
|
|
|
clause = (List *)
|
|
|
|
map_variable_attnos((Node *) clause,
|
|
|
|
INNER_VAR, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
|
|
|
clause = (List *)
|
|
|
|
map_variable_attnos((Node *) clause,
|
|
|
|
firstVarno, 0,
|
|
|
|
part_attnos,
|
|
|
|
RelationGetDescr(firstResultRel)->natts,
|
|
|
|
RelationGetForm(partrel)->reltype,
|
|
|
|
&found_whole_row);
|
|
|
|
/* We ignore the value of found_whole_row. */
|
2018-03-26 15:43:54 +02:00
|
|
|
leaf_part_rri->ri_onConflict->oc_WhereClause =
|
|
|
|
ExecInitQual((List *) clause, &mtstate->ps);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/*
|
|
|
|
* Since we've just initialized this ResultRelInfo, it's not in any list
|
|
|
|
* attached to the estate as yet. Add it, so that it can be found later.
|
|
|
|
*
|
|
|
|
* Note that the entries in this list appear in no predetermined order,
|
|
|
|
* because partition result rels are initialized as and when they're
|
|
|
|
* needed.
|
|
|
|
*/
|
|
|
|
MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
estate->es_tuple_routing_result_relations =
|
|
|
|
lappend(estate->es_tuple_routing_result_relations,
|
|
|
|
leaf_part_rri);
|
2018-03-26 15:43:54 +02:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
MemoryContextSwitchTo(oldcxt);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
return leaf_part_rri;
|
|
|
|
}
|
|
|
|
|
2018-04-07 01:16:11 +02:00
|
|
|
/*
|
|
|
|
* ExecInitRoutingInfo
|
2018-11-16 18:54:15 +01:00
|
|
|
* Set up information needed for translating tuples between root
|
|
|
|
* partitioned table format and partition format, and keep track of it
|
|
|
|
* in PartitionTupleRouting.
|
2018-04-07 01:16:11 +02:00
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
static void
|
2018-04-07 01:16:11 +02:00
|
|
|
ExecInitRoutingInfo(ModifyTableState *mtstate,
|
|
|
|
EState *estate,
|
|
|
|
PartitionTupleRouting *proute,
|
2018-11-16 18:54:15 +01:00
|
|
|
PartitionDispatch dispatch,
|
2018-04-07 01:16:11 +02:00
|
|
|
ResultRelInfo *partRelInfo,
|
|
|
|
int partidx)
|
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
MemoryContext oldcxt;
|
|
|
|
PartitionRoutingInfo *partrouteinfo;
|
|
|
|
int rri_index;
|
2018-04-07 01:16:11 +02:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
oldcxt = MemoryContextSwitchTo(proute->memcxt);
|
|
|
|
|
|
|
|
partrouteinfo = palloc(sizeof(PartitionRoutingInfo));
|
2018-04-07 01:16:11 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up a tuple conversion map to convert a tuple routed to the
|
|
|
|
* partition from the parent's type to the partition's.
|
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
partrouteinfo->pi_RootToPartitionMap =
|
2018-04-07 01:16:11 +02:00
|
|
|
convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_PartitionRoot),
|
|
|
|
RelationGetDescr(partRelInfo->ri_RelationDesc),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
/*
|
|
|
|
* If a partition has a different rowtype than the root parent, initialize
|
|
|
|
* a slot dedicated to storing this partition's tuples. The slot is used
|
|
|
|
* for various operations that are applied to tuples after routing, such
|
|
|
|
* as checking constraints.
|
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
if (partrouteinfo->pi_RootToPartitionMap != NULL)
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
{
|
|
|
|
Relation partrel = partRelInfo->ri_RelationDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the slot itself setting its descriptor to this
|
|
|
|
* partition's TupleDesc; TupleDesc reference will be released at the
|
|
|
|
* end of the command.
|
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
partrouteinfo->pi_PartitionTupleSlot =
|
|
|
|
ExecInitExtraTupleSlot(estate, RelationGetDescr(partrel),
|
Introduce notion of different types of slots (without implementing them).
Upcoming work intends to allow pluggable ways to introduce new ways of
storing table data. Accessing those table access methods from the
executor requires TupleTableSlots to be carry tuples in the native
format of such storage methods; otherwise there'll be a significant
conversion overhead.
Different access methods will require different data to store tuples
efficiently (just like virtual, minimal, heap already require fields
in TupleTableSlot). To allow that without requiring additional pointer
indirections, we want to have different structs (embedding
TupleTableSlot) for different types of slots. Thus different types of
slots are needed, which requires adapting creators of slots.
The slot that most efficiently can represent a type of tuple in an
executor node will often depend on the type of slot a child node
uses. Therefore we need to track the type of slot is returned by
nodes, so parent slots can create slots based on that.
Relatedly, JIT compilation of tuple deforming needs to know which type
of slot a certain expression refers to, so it can create an
appropriate deforming function for the type of tuple in the slot.
But not all nodes will only return one type of slot, e.g. an append
node will potentially return different types of slots for each of its
subplans.
Therefore add function that allows to query the type of a node's
result slot, and whether it'll always be the same type (whether it's
fixed). This can be queried using ExecGetResultSlotOps().
The scan, result, inner, outer type of slots are automatically
inferred from ExecInitScanTupleSlot(), ExecInitResultSlot(),
left/right subtrees respectively. If that's not correct for a node,
that can be overwritten using new fields in PlanState.
This commit does not introduce the actually abstracted implementation
of different kind of TupleTableSlots, that will be left for a followup
commit. The different types of slots introduced will, for now, still
use the same backing implementation.
While this already partially invalidates the big comment in
tuptable.h, it seems to make more sense to update it later, when the
different TupleTableSlot implementations actually exist.
Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar
Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-16 07:00:30 +01:00
|
|
|
&TTSOpsHeapTuple);
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
}
|
2018-11-16 18:54:15 +01:00
|
|
|
else
|
|
|
|
partrouteinfo->pi_PartitionTupleSlot = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Also, if transition capture is required, store a map to convert tuples
|
|
|
|
* from partition's rowtype to the root partition table's.
|
|
|
|
*/
|
|
|
|
if (mtstate &&
|
|
|
|
(mtstate->mt_transition_capture || mtstate->mt_oc_transition_capture))
|
|
|
|
{
|
|
|
|
partrouteinfo->pi_PartitionToRootMap =
|
|
|
|
convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc),
|
|
|
|
RelationGetDescr(partRelInfo->ri_PartitionRoot),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
partrouteinfo->pi_PartitionToRootMap = NULL;
|
Use slots more widely in tuple mapping code and make naming more consistent.
It's inefficient to use a single slot for mapping between tuple
descriptors for multiple tuples, as previously done when using
ConvertPartitionTupleSlot(), as that means the slot's tuple descriptors
change for every tuple.
Previously we also, via ConvertPartitionTupleSlot(), built new tuples
after the mapping even in cases where we, immediately afterwards,
access individual columns again.
Refactor the code so one slot, on demand, is used for each
partition. That avoids having to change the descriptor (and allows to
use the more efficient "fixed" tuple slots). Then use slot->slot
mapping, to avoid unnecessarily forming a tuple.
As the naming between the tuple and slot mapping functions wasn't
consistent, rename them to execute_attr_map_{tuple,slot}. It's likely
that we'll also rename convert_tuples_by_* to denote that these
functions "only" build a map, but that's left for later.
Author: Amit Khandekar and Amit Langote, editorialized by me
Reviewed-By: Amit Langote, Amit Khandekar, Andres Freund
Discussion:
https://postgr.es/m/CAJ3gD9fR0wRNeAE8VqffNTyONS_UfFPRpqxhnD9Q42vZB+Jvpg@mail.gmail.com
https://postgr.es/m/e4f9d743-cd4b-efb0-7574-da21d86a7f36%40lab.ntt.co.jp
Backpatch: -
2018-10-02 20:14:26 +02:00
|
|
|
|
2018-04-07 01:16:11 +02:00
|
|
|
/*
|
|
|
|
* If the partition is a foreign table, let the FDW init itself for
|
|
|
|
* routing tuples to the partition.
|
|
|
|
*/
|
|
|
|
if (partRelInfo->ri_FdwRoutine != NULL &&
|
|
|
|
partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
|
|
|
|
partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
|
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
partRelInfo->ri_PartitionInfo = partrouteinfo;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep track of it in the PartitionTupleRouting->partitions array.
|
|
|
|
*/
|
|
|
|
Assert(dispatch->indexes[partidx] == -1);
|
|
|
|
|
|
|
|
rri_index = proute->num_partitions++;
|
2018-04-07 01:16:11 +02:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/* Allocate or enlarge the array, as needed */
|
|
|
|
if (proute->num_partitions >= proute->max_partitions)
|
|
|
|
{
|
|
|
|
if (proute->max_partitions == 0)
|
|
|
|
{
|
|
|
|
proute->max_partitions = 8;
|
|
|
|
proute->partitions = (ResultRelInfo **)
|
|
|
|
palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
proute->max_partitions *= 2;
|
|
|
|
proute->partitions = (ResultRelInfo **)
|
|
|
|
repalloc(proute->partitions, sizeof(ResultRelInfo *) *
|
|
|
|
proute->max_partitions);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
proute->partitions[rri_index] = partRelInfo;
|
|
|
|
dispatch->indexes[partidx] = rri_index;
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcxt);
|
2018-04-07 01:16:11 +02:00
|
|
|
}
|
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
2018-11-16 18:54:15 +01:00
|
|
|
* ExecInitPartitionDispatchInfo
|
Delay lock acquisition for partitions until we route a tuple to them.
Instead of locking all partitions to which we might route a tuple at
executor startup, just lock them as we use them. In some cases such a
partition might get locked at executor startup anyway because it
appears in the query's range table for some other reason, but in other
cases this is a bit savings.
This changes the order in which partitions are locked in some cases,
which might conceivably create deadlock hazards that don't exist
today, but per discussion, it seems like such cases should be rare
enough that we can neglect them in favor of improving performance.
David Rowley, reviewed and tested by Tomas Vondra, Sho Kato, John
Naylor, Tom Lane, and me.
Discussion: http://postgr.es/m/CAKJS1f-=FnMqmQP6qitkD+xEddxw22ySLP-0xFk3JAqUX2yfMw@mail.gmail.com
2019-02-21 17:24:40 +01:00
|
|
|
* Lock the partitioned table (if not locked already) and initialize
|
|
|
|
* PartitionDispatch for a partitioned table and store it in the next
|
|
|
|
* available slot in the proute->partition_dispatch_info array. Also,
|
|
|
|
* record the index into this array in the parent_pd->indexes[] array in
|
|
|
|
* the partidx element so that we can properly retrieve the newly created
|
|
|
|
* PartitionDispatch later.
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
static PartitionDispatch
|
|
|
|
ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, Oid partoid,
|
|
|
|
PartitionDispatch parent_pd, int partidx)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
Relation rel;
|
|
|
|
PartitionDesc partdesc;
|
|
|
|
PartitionDispatch pd;
|
|
|
|
int dispatchidx;
|
|
|
|
MemoryContext oldcxt;
|
|
|
|
|
|
|
|
oldcxt = MemoryContextSwitchTo(proute->memcxt);
|
|
|
|
|
Delay lock acquisition for partitions until we route a tuple to them.
Instead of locking all partitions to which we might route a tuple at
executor startup, just lock them as we use them. In some cases such a
partition might get locked at executor startup anyway because it
appears in the query's range table for some other reason, but in other
cases this is a bit savings.
This changes the order in which partitions are locked in some cases,
which might conceivably create deadlock hazards that don't exist
today, but per discussion, it seems like such cases should be rare
enough that we can neglect them in favor of improving performance.
David Rowley, reviewed and tested by Tomas Vondra, Sho Kato, John
Naylor, Tom Lane, and me.
Discussion: http://postgr.es/m/CAKJS1f-=FnMqmQP6qitkD+xEddxw22ySLP-0xFk3JAqUX2yfMw@mail.gmail.com
2019-02-21 17:24:40 +01:00
|
|
|
/*
|
|
|
|
* Only sub-partitioned tables need to be locked here. The root
|
|
|
|
* partitioned table will already have been locked as it's referenced in
|
|
|
|
* the query's rtable.
|
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
if (partoid != RelationGetRelid(proute->partition_root))
|
Delay lock acquisition for partitions until we route a tuple to them.
Instead of locking all partitions to which we might route a tuple at
executor startup, just lock them as we use them. In some cases such a
partition might get locked at executor startup anyway because it
appears in the query's range table for some other reason, but in other
cases this is a bit savings.
This changes the order in which partitions are locked in some cases,
which might conceivably create deadlock hazards that don't exist
today, but per discussion, it seems like such cases should be rare
enough that we can neglect them in favor of improving performance.
David Rowley, reviewed and tested by Tomas Vondra, Sho Kato, John
Naylor, Tom Lane, and me.
Discussion: http://postgr.es/m/CAKJS1f-=FnMqmQP6qitkD+xEddxw22ySLP-0xFk3JAqUX2yfMw@mail.gmail.com
2019-02-21 17:24:40 +01:00
|
|
|
rel = table_open(partoid, RowExclusiveLock);
|
2018-11-16 18:54:15 +01:00
|
|
|
else
|
|
|
|
rel = proute->partition_root;
|
|
|
|
partdesc = RelationGetPartitionDesc(rel);
|
|
|
|
|
|
|
|
pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
|
|
|
|
partdesc->nparts * sizeof(int));
|
|
|
|
pd->reldesc = rel;
|
|
|
|
pd->key = RelationGetPartitionKey(rel);
|
|
|
|
pd->keystate = NIL;
|
|
|
|
pd->partdesc = partdesc;
|
|
|
|
if (parent_pd != NULL)
|
|
|
|
{
|
|
|
|
TupleDesc tupdesc = RelationGetDescr(rel);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For sub-partitioned tables where the column order differs from its
|
|
|
|
* direct parent partitioned table, we must store a tuple table slot
|
|
|
|
* initialized with its tuple descriptor and a tuple conversion map to
|
|
|
|
* convert a tuple from its parent's rowtype to its own. This is to
|
|
|
|
* make sure that we are looking at the correct row using the correct
|
|
|
|
* tuple descriptor when computing its partition key for tuple
|
|
|
|
* routing.
|
|
|
|
*/
|
|
|
|
pd->tupmap = convert_tuples_by_name_map_if_req(RelationGetDescr(parent_pd->reldesc),
|
|
|
|
tupdesc,
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
pd->tupslot = pd->tupmap ?
|
2019-03-02 02:24:57 +01:00
|
|
|
MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
|
2018-11-16 18:54:15 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Not required for the root partitioned table */
|
|
|
|
pd->tupmap = NULL;
|
|
|
|
pd->tupslot = NULL;
|
|
|
|
}
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
|
|
|
/*
|
2018-11-16 18:54:15 +01:00
|
|
|
* Initialize with -1 to signify that the corresponding partition's
|
|
|
|
* ResultRelInfo or PartitionDispatch has not been created yet.
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
*/
|
2018-11-16 18:54:15 +01:00
|
|
|
memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/* Track in PartitionTupleRouting for later use */
|
|
|
|
dispatchidx = proute->num_dispatch++;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/* Allocate or enlarge the array, as needed */
|
|
|
|
if (proute->num_dispatch >= proute->max_dispatch)
|
|
|
|
{
|
|
|
|
if (proute->max_dispatch == 0)
|
|
|
|
{
|
|
|
|
proute->max_dispatch = 4;
|
|
|
|
proute->partition_dispatch_info = (PartitionDispatch *)
|
|
|
|
palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
proute->max_dispatch *= 2;
|
|
|
|
proute->partition_dispatch_info = (PartitionDispatch *)
|
|
|
|
repalloc(proute->partition_dispatch_info,
|
|
|
|
sizeof(PartitionDispatch) * proute->max_dispatch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
proute->partition_dispatch_info[dispatchidx] = pd;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/*
|
|
|
|
* Finally, if setting up a PartitionDispatch for a sub-partitioned table,
|
|
|
|
* install a downlink in the parent to allow quick descent.
|
|
|
|
*/
|
|
|
|
if (parent_pd)
|
|
|
|
{
|
|
|
|
Assert(parent_pd->indexes[partidx] == -1);
|
|
|
|
parent_pd->indexes[partidx] = dispatchidx;
|
|
|
|
}
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
MemoryContextSwitchTo(oldcxt);
|
|
|
|
|
|
|
|
return pd;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
}
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
/*
|
|
|
|
* ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
|
|
|
|
* routing.
|
|
|
|
*
|
|
|
|
* Close all the partitioned tables, leaf partitions, and their indices.
|
|
|
|
*/
|
|
|
|
void
|
2018-04-07 01:16:11 +02:00
|
|
|
ExecCleanupTupleRouting(ModifyTableState *mtstate,
|
|
|
|
PartitionTupleRouting *proute)
|
2018-01-04 21:48:15 +01:00
|
|
|
{
|
2018-11-16 18:54:15 +01:00
|
|
|
HTAB *htab = proute->subplan_resultrel_htab;
|
2018-01-04 21:48:15 +01:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remember, proute->partition_dispatch_info[0] corresponds to the root
|
|
|
|
* partitioned table, which we must not try to close, because it is the
|
|
|
|
* main target table of the query that will be closed by callers such as
|
|
|
|
* ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
|
|
|
|
* partitioned table.
|
|
|
|
*/
|
|
|
|
for (i = 1; i < proute->num_dispatch; i++)
|
|
|
|
{
|
|
|
|
PartitionDispatch pd = proute->partition_dispatch_info[i];
|
|
|
|
|
2019-01-21 19:32:19 +01:00
|
|
|
table_close(pd->reldesc, NoLock);
|
2018-11-16 18:54:15 +01:00
|
|
|
|
|
|
|
if (pd->tupslot)
|
|
|
|
ExecDropSingleTupleTableSlot(pd->tupslot);
|
2018-01-04 21:48:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < proute->num_partitions; i++)
|
|
|
|
{
|
|
|
|
ResultRelInfo *resultRelInfo = proute->partitions[i];
|
|
|
|
|
2018-11-16 18:54:15 +01:00
|
|
|
/*
|
|
|
|
* Check if this result rel is one belonging to the node's subplans,
|
|
|
|
* if so, let ExecEndPlan() clean it up.
|
|
|
|
*/
|
|
|
|
if (htab)
|
|
|
|
{
|
|
|
|
Oid partoid;
|
|
|
|
bool found;
|
|
|
|
|
|
|
|
partoid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
|
|
|
|
|
|
|
|
(void) hash_search(htab, &partoid, HASH_FIND, &found);
|
|
|
|
if (found)
|
|
|
|
continue;
|
|
|
|
}
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
2018-04-07 01:16:11 +02:00
|
|
|
/* Allow any FDWs to shut down if they've been exercised */
|
2018-11-16 18:54:15 +01:00
|
|
|
if (resultRelInfo->ri_FdwRoutine != NULL &&
|
2018-04-07 01:16:11 +02:00
|
|
|
resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
|
|
|
|
resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
|
|
|
|
resultRelInfo);
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
ExecCloseIndices(resultRelInfo);
|
2019-01-21 19:32:19 +01:00
|
|
|
table_close(resultRelInfo->ri_RelationDesc, NoLock);
|
2018-01-04 21:48:15 +01:00
|
|
|
}
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------
|
|
|
|
* FormPartitionKeyDatum
|
|
|
|
* Construct values[] and isnull[] arrays for the partition key
|
|
|
|
* of a tuple.
|
|
|
|
*
|
|
|
|
* pd Partition dispatch object of the partitioned table
|
|
|
|
* slot Heap tuple from which to extract partition key
|
|
|
|
* estate executor state for evaluating any partition key
|
|
|
|
* expressions (must be non-NULL)
|
|
|
|
* values Array of partition key Datums (output area)
|
|
|
|
* isnull Array of is-null indicators (output area)
|
|
|
|
*
|
|
|
|
* the ecxt_scantuple slot of estate's per-tuple expr context must point to
|
|
|
|
* the heap tuple passed in.
|
|
|
|
* ----------------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FormPartitionKeyDatum(PartitionDispatch pd,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull)
|
|
|
|
{
|
|
|
|
ListCell *partexpr_item;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (pd->key->partexprs != NIL && pd->keystate == NIL)
|
|
|
|
{
|
|
|
|
/* Check caller has set up context correctly */
|
|
|
|
Assert(estate != NULL &&
|
|
|
|
GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
|
|
|
|
|
|
|
|
/* First time through, set up expression evaluation state */
|
|
|
|
pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
|
|
|
|
}
|
|
|
|
|
|
|
|
partexpr_item = list_head(pd->keystate);
|
|
|
|
for (i = 0; i < pd->key->partnatts; i++)
|
|
|
|
{
|
|
|
|
AttrNumber keycol = pd->key->partattrs[i];
|
|
|
|
Datum datum;
|
|
|
|
bool isNull;
|
|
|
|
|
|
|
|
if (keycol != 0)
|
|
|
|
{
|
|
|
|
/* Plain column; get the value directly from the heap tuple */
|
|
|
|
datum = slot_getattr(slot, keycol, &isNull);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Expression; need to evaluate it */
|
|
|
|
if (partexpr_item == NULL)
|
|
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
|
|
datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
|
|
|
|
GetPerTupleExprContext(estate),
|
|
|
|
&isNull);
|
|
|
|
partexpr_item = lnext(partexpr_item);
|
|
|
|
}
|
|
|
|
values[i] = datum;
|
|
|
|
isnull[i] = isNull;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (partexpr_item != NULL)
|
|
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
|
|
}
|
|
|
|
|
2018-04-15 02:12:14 +02:00
|
|
|
/*
|
|
|
|
* get_partition_for_tuple
|
|
|
|
* Finds partition of relation which accepts the partition key specified
|
|
|
|
* in values and isnull
|
|
|
|
*
|
|
|
|
* Return value is index of the partition (>= 0 and < partdesc->nparts) if one
|
|
|
|
* found or -1 if none found.
|
|
|
|
*/
|
2018-04-21 17:21:08 +02:00
|
|
|
static int
|
2018-07-27 15:34:57 +02:00
|
|
|
get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
|
2018-04-15 02:12:14 +02:00
|
|
|
{
|
|
|
|
int bound_offset;
|
|
|
|
int part_index = -1;
|
2018-07-27 15:34:57 +02:00
|
|
|
PartitionKey key = pd->key;
|
|
|
|
PartitionDesc partdesc = pd->partdesc;
|
2018-06-13 19:18:02 +02:00
|
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
2018-04-15 02:12:14 +02:00
|
|
|
|
|
|
|
/* Route as appropriate based on partitioning strategy. */
|
|
|
|
switch (key->strategy)
|
|
|
|
{
|
|
|
|
case PARTITION_STRATEGY_HASH:
|
|
|
|
{
|
2018-06-13 19:18:02 +02:00
|
|
|
int greatest_modulus;
|
|
|
|
uint64 rowHash;
|
|
|
|
|
|
|
|
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
|
|
|
|
rowHash = compute_partition_hash_value(key->partnatts,
|
|
|
|
key->partsupfunc,
|
|
|
|
values, isnull);
|
2018-04-15 02:12:14 +02:00
|
|
|
|
|
|
|
part_index = boundinfo->indexes[rowHash % greatest_modulus];
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
|
|
if (isnull[0])
|
|
|
|
{
|
2018-06-13 19:18:02 +02:00
|
|
|
if (partition_bound_accepts_nulls(boundinfo))
|
|
|
|
part_index = boundinfo->null_index;
|
2018-04-15 02:12:14 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bool equal = false;
|
|
|
|
|
|
|
|
bound_offset = partition_list_bsearch(key->partsupfunc,
|
|
|
|
key->partcollation,
|
2018-06-13 19:18:02 +02:00
|
|
|
boundinfo,
|
2018-04-15 02:12:14 +02:00
|
|
|
values[0], &equal);
|
|
|
|
if (bound_offset >= 0 && equal)
|
2018-06-13 19:18:02 +02:00
|
|
|
part_index = boundinfo->indexes[bound_offset];
|
2018-04-15 02:12:14 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
|
|
{
|
|
|
|
bool equal = false,
|
|
|
|
range_partkey_has_null = false;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No range includes NULL, so this will be accepted by the
|
|
|
|
* default partition if there is one, and otherwise rejected.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < key->partnatts; i++)
|
|
|
|
{
|
|
|
|
if (isnull[i])
|
|
|
|
{
|
|
|
|
range_partkey_has_null = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!range_partkey_has_null)
|
|
|
|
{
|
|
|
|
bound_offset = partition_range_datum_bsearch(key->partsupfunc,
|
|
|
|
key->partcollation,
|
2018-06-13 19:18:02 +02:00
|
|
|
boundinfo,
|
2018-04-15 02:12:14 +02:00
|
|
|
key->partnatts,
|
|
|
|
values,
|
|
|
|
&equal);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The bound at bound_offset is less than or equal to the
|
|
|
|
* tuple value, so the bound at offset+1 is the upper
|
|
|
|
* bound of the partition we're looking for, if there
|
|
|
|
* actually exists one.
|
|
|
|
*/
|
2018-06-13 19:18:02 +02:00
|
|
|
part_index = boundinfo->indexes[bound_offset + 1];
|
2018-04-15 02:12:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
|
|
(int) key->strategy);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* part_index < 0 means we failed to find a partition of this parent. Use
|
|
|
|
* the default partition, if there is one.
|
|
|
|
*/
|
|
|
|
if (part_index < 0)
|
2018-06-13 19:18:02 +02:00
|
|
|
part_index = boundinfo->default_index;
|
2018-04-15 02:12:14 +02:00
|
|
|
|
|
|
|
return part_index;
|
|
|
|
}
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
/*
|
2017-11-28 20:17:21 +01:00
|
|
|
* ExecBuildSlotPartitionKeyDescription
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
|
|
|
* This works very much like BuildIndexValueDescription() and is currently
|
|
|
|
* used for building error messages when ExecFindPartition() fails to find
|
|
|
|
* partition for a row.
|
|
|
|
*/
|
|
|
|
static char *
|
|
|
|
ExecBuildSlotPartitionKeyDescription(Relation rel,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull,
|
|
|
|
int maxfieldlen)
|
|
|
|
{
|
|
|
|
StringInfoData buf;
|
|
|
|
PartitionKey key = RelationGetPartitionKey(rel);
|
|
|
|
int partnatts = get_partition_natts(key);
|
|
|
|
int i;
|
|
|
|
Oid relid = RelationGetRelid(rel);
|
|
|
|
AclResult aclresult;
|
|
|
|
|
|
|
|
if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* If the user has table-level access, just go build the description. */
|
|
|
|
aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
|
|
|
|
if (aclresult != ACLCHECK_OK)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Step through the columns of the partition key and make sure the
|
|
|
|
* user has SELECT rights on all of them.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < partnatts; i++)
|
|
|
|
{
|
|
|
|
AttrNumber attnum = get_partition_col_attnum(key, i);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this partition key column is an expression, we return no
|
|
|
|
* detail rather than try to figure out what column(s) the
|
|
|
|
* expression includes and if the user has SELECT rights on them.
|
|
|
|
*/
|
|
|
|
if (attnum == InvalidAttrNumber ||
|
|
|
|
pg_attribute_aclcheck(relid, attnum, GetUserId(),
|
|
|
|
ACL_SELECT) != ACLCHECK_OK)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
initStringInfo(&buf);
|
|
|
|
appendStringInfo(&buf, "(%s) = (",
|
|
|
|
pg_get_partkeydef_columns(relid, true));
|
|
|
|
|
|
|
|
for (i = 0; i < partnatts; i++)
|
|
|
|
{
|
|
|
|
char *val;
|
|
|
|
int vallen;
|
|
|
|
|
|
|
|
if (isnull[i])
|
|
|
|
val = "null";
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Oid foutoid;
|
|
|
|
bool typisvarlena;
|
|
|
|
|
|
|
|
getTypeOutputInfo(get_partition_col_typid(key, i),
|
|
|
|
&foutoid, &typisvarlena);
|
|
|
|
val = OidOutputFunctionCall(foutoid, values[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i > 0)
|
|
|
|
appendStringInfoString(&buf, ", ");
|
|
|
|
|
|
|
|
/* truncate if needed */
|
|
|
|
vallen = strlen(val);
|
|
|
|
if (vallen <= maxfieldlen)
|
|
|
|
appendStringInfoString(&buf, val);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
vallen = pg_mbcliplen(val, vallen, maxfieldlen);
|
|
|
|
appendBinaryStringInfo(&buf, val, vallen);
|
|
|
|
appendStringInfoString(&buf, "...");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
appendStringInfoChar(&buf, ')');
|
|
|
|
|
|
|
|
return buf.data;
|
|
|
|
}
|
2018-03-26 15:43:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* adjust_partition_tlist
|
|
|
|
* Adjust the targetlist entries for a given partition to account for
|
|
|
|
* attribute differences between parent and the partition
|
|
|
|
*
|
|
|
|
* The expressions have already been fixed, but here we fix the list to make
|
|
|
|
* target resnos match the partition's attribute numbers. This results in a
|
|
|
|
* copy of the original target list in which the entries appear in resno
|
|
|
|
* order, including both the existing entries (that may have their resno
|
|
|
|
* changed in-place) and the newly added entries for columns that don't exist
|
|
|
|
* in the parent.
|
|
|
|
*
|
|
|
|
* Scribbles on the input tlist, so callers must make sure to make a copy
|
|
|
|
* before passing it to us.
|
|
|
|
*/
|
|
|
|
static List *
|
|
|
|
adjust_partition_tlist(List *tlist, TupleConversionMap *map)
|
|
|
|
{
|
|
|
|
List *new_tlist = NIL;
|
|
|
|
TupleDesc tupdesc = map->outdesc;
|
|
|
|
AttrNumber *attrMap = map->attrMap;
|
|
|
|
AttrNumber attrno;
|
|
|
|
|
|
|
|
for (attrno = 1; attrno <= tupdesc->natts; attrno++)
|
|
|
|
{
|
|
|
|
Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1);
|
|
|
|
TargetEntry *tle;
|
|
|
|
|
|
|
|
if (attrMap[attrno - 1] != InvalidAttrNumber)
|
|
|
|
{
|
|
|
|
Assert(!att_tup->attisdropped);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the corresponding entry from the parent's tlist, adjusting
|
|
|
|
* the resno the match the partition's attno.
|
|
|
|
*/
|
|
|
|
tle = (TargetEntry *) list_nth(tlist, attrMap[attrno - 1] - 1);
|
|
|
|
tle->resno = attrno;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Const *expr;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For a dropped attribute in the partition, generate a dummy
|
|
|
|
* entry with resno matching the partition's attno.
|
|
|
|
*/
|
|
|
|
Assert(att_tup->attisdropped);
|
|
|
|
expr = makeConst(INT4OID,
|
|
|
|
-1,
|
|
|
|
InvalidOid,
|
|
|
|
sizeof(int32),
|
|
|
|
(Datum) 0,
|
|
|
|
true, /* isnull */
|
|
|
|
true /* byval */ );
|
|
|
|
tle = makeTargetEntry((Expr *) expr,
|
|
|
|
attrno,
|
|
|
|
pstrdup(NameStr(att_tup->attname)),
|
|
|
|
false);
|
|
|
|
}
|
|
|
|
|
|
|
|
new_tlist = lappend(new_tlist, tle);
|
|
|
|
}
|
|
|
|
|
|
|
|
return new_tlist;
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* Run-Time Partition Pruning Support.
|
|
|
|
*
|
|
|
|
* The following series of functions exist to support the removal of unneeded
|
2018-06-11 00:24:34 +02:00
|
|
|
* subplans for queries against partitioned tables. The supporting functions
|
|
|
|
* here are designed to work with any plan type which supports an arbitrary
|
|
|
|
* number of subplans, e.g. Append, MergeAppend.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-10 21:22:25 +02:00
|
|
|
* When pruning involves comparison of a partition key to a constant, it's
|
|
|
|
* done by the planner. However, if we have a comparison to a non-constant
|
|
|
|
* but not volatile expression, that presents an opportunity for run-time
|
|
|
|
* pruning by the executor, allowing irrelevant partitions to be skipped
|
|
|
|
* dynamically.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-10 21:22:25 +02:00
|
|
|
* We must distinguish expressions containing PARAM_EXEC Params from
|
|
|
|
* expressions that don't contain those. Even though a PARAM_EXEC Param is
|
2018-06-11 00:24:34 +02:00
|
|
|
* considered to be a stable expression, it can change value from one plan
|
|
|
|
* node scan to the next during query execution. Stable comparison
|
|
|
|
* expressions that don't involve such Params allow partition pruning to be
|
|
|
|
* done once during executor startup. Expressions that do involve such Params
|
|
|
|
* require us to prune separately for each scan of the parent plan node.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* Note that pruning away unneeded subplans during executor startup has the
|
|
|
|
* added benefit of not having to initialize the unneeded subplans at all.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* Functions:
|
|
|
|
*
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
* ExecCreatePartitionPruneState:
|
2018-06-11 00:24:34 +02:00
|
|
|
* Creates the PartitionPruneState required by each of the two pruning
|
|
|
|
* functions. Details stored include how to map the partition index
|
|
|
|
* returned by the partition pruning code into subplan indexes.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
|
|
|
* ExecFindInitialMatchingSubPlans:
|
2018-06-11 00:24:34 +02:00
|
|
|
* Returns indexes of matching subplans. Partition pruning is attempted
|
2018-06-10 21:22:25 +02:00
|
|
|
* without any evaluation of expressions containing PARAM_EXEC Params.
|
2018-06-11 00:24:34 +02:00
|
|
|
* This function must be called during executor startup for the parent
|
|
|
|
* plan before the subplans themselves are initialized. Subplans which
|
|
|
|
* are found not to match by this function must be removed from the
|
|
|
|
* plan's list of subplans during execution, as this function performs a
|
|
|
|
* remap of the partition index to subplan index map and the newly
|
|
|
|
* created map provides indexes only for subplans which remain after
|
|
|
|
* calling this function.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
|
|
|
* ExecFindMatchingSubPlans:
|
2018-06-11 00:24:34 +02:00
|
|
|
* Returns indexes of matching subplans after evaluating all available
|
|
|
|
* expressions. This function can only be called during execution and
|
|
|
|
* must be called again each time the value of a Param listed in
|
|
|
|
* PartitionPruneState's 'execparamids' changes.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
* ExecCreatePartitionPruneState
|
|
|
|
* Build the data structure required for calling
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
* ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans.
|
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* 'planstate' is the parent plan node's execution state.
|
|
|
|
*
|
2018-08-02 01:42:46 +02:00
|
|
|
* 'partitionpruneinfo' is a PartitionPruneInfo as generated by
|
2018-06-11 00:24:34 +02:00
|
|
|
* make_partition_pruneinfo. Here we build a PartitionPruneState containing a
|
2018-08-02 01:42:46 +02:00
|
|
|
* PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
|
|
|
|
* partitionpruneinfo->prune_infos), each of which contains a
|
|
|
|
* PartitionedRelPruningData for each PartitionedRelPruneInfo appearing in
|
|
|
|
* that sublist. This two-level system is needed to keep from confusing the
|
|
|
|
* different hierarchies when a UNION ALL contains multiple partitioned tables
|
|
|
|
* as children. The data stored in each PartitionedRelPruningData can be
|
|
|
|
* re-used each time we re-evaluate which partitions match the pruning steps
|
|
|
|
* provided in each PartitionedRelPruneInfo.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
PartitionPruneState *
|
2018-08-02 01:42:46 +02:00
|
|
|
ExecCreatePartitionPruneState(PlanState *planstate,
|
|
|
|
PartitionPruneInfo *partitionpruneinfo)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-10-04 20:03:37 +02:00
|
|
|
EState *estate = planstate->state;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
PartitionPruneState *prunestate;
|
2018-08-02 01:42:46 +02:00
|
|
|
int n_part_hierarchies;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
ListCell *lc;
|
|
|
|
int i;
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
|
|
|
|
Assert(n_part_hierarchies > 0);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-06-11 00:24:34 +02:00
|
|
|
/*
|
|
|
|
* Allocate the data structure
|
|
|
|
*/
|
2018-08-02 01:42:46 +02:00
|
|
|
prunestate = (PartitionPruneState *)
|
|
|
|
palloc(offsetof(PartitionPruneState, partprunedata) +
|
|
|
|
sizeof(PartitionPruningData *) * n_part_hierarchies);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
prunestate->execparamids = NULL;
|
|
|
|
/* other_subplans can change at runtime, so we need our own copy */
|
|
|
|
prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans);
|
2018-06-10 21:22:25 +02:00
|
|
|
prunestate->do_initial_prune = false; /* may be set below */
|
|
|
|
prunestate->do_exec_prune = false; /* may be set below */
|
2018-08-02 01:42:46 +02:00
|
|
|
prunestate->num_partprunedata = n_part_hierarchies;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
/*
|
2018-06-11 00:24:34 +02:00
|
|
|
* Create a short-term memory context which we'll use when making calls to
|
|
|
|
* the partition pruning functions. This avoids possible memory leaks,
|
|
|
|
* since the pruning functions call comparison functions that aren't under
|
|
|
|
* our control.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
prunestate->prune_context =
|
|
|
|
AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
"Partition Prune",
|
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
|
|
|
i = 0;
|
2018-08-02 01:42:46 +02:00
|
|
|
foreach(lc, partitionpruneinfo->prune_infos)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
List *partrelpruneinfos = lfirst_node(List, lc);
|
|
|
|
int npartrelpruneinfos = list_length(partrelpruneinfos);
|
|
|
|
PartitionPruningData *prunedata;
|
Fix up run-time partition pruning's use of relcache's partition data.
The previous coding saved pointers into the partitioned table's relcache
entry, but then closed the relcache entry, causing those pointers to
nominally become dangling. Actual trouble would be seen in the field
only if a relcache flush occurred mid-query, but that's hardly out of
the question.
While we could fix this by copying all the data in question at query
start, it seems better to just hold the relcache entry open for the
whole query.
While at it, improve the handling of support-function lookups: do that
once per query not once per pruning test. There's still something to be
desired here, in that we fail to exploit the possibility of caching data
across queries in the fn_extra fields of the relcache's FmgrInfo structs,
which could happen if we just used those structs in-place rather than
copying them. However, combining that with the possibility of per-query
lookups of cross-type comparison functions seems to require changes in the
APIs of a lot of the pruning support functions, so it's too invasive to
consider as part of this patch. A win would ensue only for complex
partition key data types (e.g. arrays), so it may not be worth the
trouble.
David Rowley and Tom Lane
Discussion: https://postgr.es/m/17850.1528755844@sss.pgh.pa.us
2018-06-13 18:03:19 +02:00
|
|
|
ListCell *lc2;
|
2018-08-02 01:42:46 +02:00
|
|
|
int j;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
prunedata = (PartitionPruningData *)
|
|
|
|
palloc(offsetof(PartitionPruningData, partrelprunedata) +
|
|
|
|
npartrelpruneinfos * sizeof(PartitionedRelPruningData));
|
|
|
|
prunestate->partprunedata[i] = prunedata;
|
|
|
|
prunedata->num_partrelprunedata = npartrelpruneinfos;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
j = 0;
|
|
|
|
foreach(lc2, partrelpruneinfos)
|
2018-04-24 19:03:10 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2);
|
|
|
|
PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
|
|
|
|
PartitionPruneContext *context = &pprune->context;
|
2018-10-04 20:03:37 +02:00
|
|
|
Relation partrel;
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionDesc partdesc;
|
|
|
|
PartitionKey partkey;
|
|
|
|
int partnatts;
|
|
|
|
int n_steps;
|
2018-04-24 19:03:10 +02:00
|
|
|
ListCell *lc3;
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* We must copy the subplan_map rather than pointing directly to
|
|
|
|
* the plan's version, as we may end up making modifications to it
|
|
|
|
* later.
|
|
|
|
*/
|
|
|
|
pprune->subplan_map = palloc(sizeof(int) * pinfo->nparts);
|
|
|
|
memcpy(pprune->subplan_map, pinfo->subplan_map,
|
|
|
|
sizeof(int) * pinfo->nparts);
|
2018-04-24 19:03:10 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* We can use the subpart_map verbatim, since we never modify it */
|
|
|
|
pprune->subpart_map = pinfo->subpart_map;
|
2018-04-24 19:03:10 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* present_parts is also subject to later modification */
|
|
|
|
pprune->present_parts = bms_copy(pinfo->present_parts);
|
|
|
|
|
|
|
|
/*
|
2018-10-04 20:03:37 +02:00
|
|
|
* We can rely on the copies of the partitioned table's partition
|
|
|
|
* key and partition descriptor appearing in its relcache entry,
|
|
|
|
* because that entry will be held open and locked for the
|
|
|
|
* duration of this executor run.
|
2018-08-02 01:42:46 +02:00
|
|
|
*/
|
2018-10-04 20:03:37 +02:00
|
|
|
partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex);
|
|
|
|
partkey = RelationGetPartitionKey(partrel);
|
|
|
|
partdesc = RelationGetPartitionDesc(partrel);
|
2018-08-02 01:42:46 +02:00
|
|
|
|
|
|
|
n_steps = list_length(pinfo->pruning_steps);
|
|
|
|
|
|
|
|
context->strategy = partkey->strategy;
|
|
|
|
context->partnatts = partnatts = partkey->partnatts;
|
|
|
|
context->nparts = pinfo->nparts;
|
|
|
|
context->boundinfo = partdesc->boundinfo;
|
|
|
|
context->partcollation = partkey->partcollation;
|
|
|
|
context->partsupfunc = partkey->partsupfunc;
|
|
|
|
|
|
|
|
/* We'll look up type-specific support functions as needed */
|
|
|
|
context->stepcmpfuncs = (FmgrInfo *)
|
|
|
|
palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
|
|
|
|
|
|
|
|
context->ppccontext = CurrentMemoryContext;
|
|
|
|
context->planstate = planstate;
|
|
|
|
|
|
|
|
/* Initialize expression state for each expression we need */
|
|
|
|
context->exprstates = (ExprState **)
|
|
|
|
palloc0(sizeof(ExprState *) * n_steps * partnatts);
|
|
|
|
foreach(lc3, pinfo->pruning_steps)
|
2018-04-24 19:03:10 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc3);
|
|
|
|
ListCell *lc4;
|
|
|
|
int keyno;
|
|
|
|
|
|
|
|
/* not needed for other step kinds */
|
|
|
|
if (!IsA(step, PartitionPruneStepOp))
|
|
|
|
continue;
|
2018-04-24 19:03:10 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
Assert(list_length(step->exprs) <= partnatts);
|
|
|
|
|
|
|
|
keyno = 0;
|
|
|
|
foreach(lc4, step->exprs)
|
2018-04-24 19:03:10 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
Expr *expr = (Expr *) lfirst(lc4);
|
|
|
|
|
|
|
|
/* not needed for Consts */
|
|
|
|
if (!IsA(expr, Const))
|
|
|
|
{
|
|
|
|
int stateidx = PruneCxtStateIdx(partnatts,
|
|
|
|
step->step.step_id,
|
|
|
|
keyno);
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
context->exprstates[stateidx] =
|
|
|
|
ExecInitExpr(expr, context->planstate);
|
|
|
|
}
|
|
|
|
keyno++;
|
2018-04-24 19:03:10 +02:00
|
|
|
}
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* Array is not modified at runtime, so just point to plan's copy */
|
|
|
|
context->exprhasexecparam = pinfo->hasexecparam;
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
pprune->pruning_steps = pinfo->pruning_steps;
|
|
|
|
pprune->do_initial_prune = pinfo->do_initial_prune;
|
|
|
|
pprune->do_exec_prune = pinfo->do_exec_prune;
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* Record if pruning would be useful at any level */
|
|
|
|
prunestate->do_initial_prune |= pinfo->do_initial_prune;
|
|
|
|
prunestate->do_exec_prune |= pinfo->do_exec_prune;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* Accumulate the IDs of all PARAM_EXEC Params affecting the
|
|
|
|
* partitioning decisions at this plan node.
|
|
|
|
*/
|
|
|
|
prunestate->execparamids = bms_add_members(prunestate->execparamids,
|
|
|
|
pinfo->execparamids);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
j++;
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return prunestate;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecFindInitialMatchingSubPlans
|
2018-06-11 00:24:34 +02:00
|
|
|
* Identify the set of subplans that cannot be eliminated by initial
|
2018-11-15 19:34:16 +01:00
|
|
|
* pruning, disregarding any pruning constraints involving PARAM_EXEC
|
|
|
|
* Params.
|
|
|
|
*
|
|
|
|
* If additional pruning passes will be required (because of PARAM_EXEC
|
|
|
|
* Params), we must also update the translation data that allows conversion
|
|
|
|
* of partition indexes into subplan indexes to account for the unneeded
|
|
|
|
* subplans having been removed.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* Must only be called once per 'prunestate', and only if initial pruning
|
|
|
|
* is required.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-11 00:24:34 +02:00
|
|
|
* 'nsubplans' must be passed as the total number of unpruned subplans.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
Bitmapset *
|
2018-06-11 00:24:34 +02:00
|
|
|
ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
|
|
|
Bitmapset *result = NULL;
|
2018-08-02 01:42:46 +02:00
|
|
|
MemoryContext oldcontext;
|
|
|
|
int i;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-11-15 19:34:16 +01:00
|
|
|
/* Caller error if we get here without do_initial_prune */
|
2018-06-10 21:22:25 +02:00
|
|
|
Assert(prunestate->do_initial_prune);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Switch to a temp context to avoid leaking memory in the executor's
|
2018-11-15 19:34:16 +01:00
|
|
|
* query-lifespan memory context.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
2018-11-15 19:34:16 +01:00
|
|
|
* For each hierarchy, do the pruning tests, and add nondeletable
|
|
|
|
* subplans' indexes to "result".
|
2018-08-02 01:42:46 +02:00
|
|
|
*/
|
|
|
|
for (i = 0; i < prunestate->num_partprunedata; i++)
|
|
|
|
{
|
|
|
|
PartitionPruningData *prunedata;
|
|
|
|
PartitionedRelPruningData *pprune;
|
|
|
|
|
|
|
|
prunedata = prunestate->partprunedata[i];
|
|
|
|
pprune = &prunedata->partrelprunedata[0];
|
|
|
|
|
|
|
|
/* Perform pruning without using PARAM_EXEC Params */
|
|
|
|
find_matching_subplans_recurse(prunedata, pprune, true, &result);
|
|
|
|
|
|
|
|
/* Expression eval may have used space in node's ps_ExprContext too */
|
|
|
|
ResetExprContext(pprune->context.planstate->ps_ExprContext);
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-11-15 19:34:16 +01:00
|
|
|
/* Add in any subplans that partition pruning didn't account for */
|
|
|
|
result = bms_add_members(result, prunestate->other_subplans);
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
2018-06-10 21:22:25 +02:00
|
|
|
/* Copy result out of the temp context before we reset it */
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
result = bms_copy(result);
|
|
|
|
|
|
|
|
MemoryContextReset(prunestate->prune_context);
|
|
|
|
|
|
|
|
/*
|
2018-11-15 19:34:16 +01:00
|
|
|
* If exec-time pruning is required and we pruned subplans above, then we
|
|
|
|
* must re-sequence the subplan indexes so that ExecFindMatchingSubPlans
|
|
|
|
* properly returns the indexes from the subplans which will remain after
|
|
|
|
* execution of this function.
|
|
|
|
*
|
|
|
|
* We can safely skip this when !do_exec_prune, even though that leaves
|
|
|
|
* invalid data in prunestate, because that data won't be consulted again
|
|
|
|
* (cf initial Assert in ExecFindMatchingSubPlans).
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
2018-11-15 19:34:16 +01:00
|
|
|
if (prunestate->do_exec_prune && bms_num_members(result) < nsubplans)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-06-11 00:24:34 +02:00
|
|
|
int *new_subplan_indexes;
|
2018-08-02 01:42:46 +02:00
|
|
|
Bitmapset *new_other_subplans;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
int i;
|
|
|
|
int newidx;
|
|
|
|
|
|
|
|
/*
|
2018-06-11 23:35:43 +02:00
|
|
|
* First we must build a temporary array which maps old subplan
|
2018-11-15 19:34:16 +01:00
|
|
|
* indexes to new ones. For convenience of initialization, we use
|
|
|
|
* 1-based indexes in this array and leave pruned items as 0.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
2018-11-15 19:34:16 +01:00
|
|
|
new_subplan_indexes = (int *) palloc0(sizeof(int) * nsubplans);
|
|
|
|
newidx = 1;
|
|
|
|
i = -1;
|
|
|
|
while ((i = bms_next_member(result, i)) >= 0)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-11-15 19:34:16 +01:00
|
|
|
Assert(i < nsubplans);
|
|
|
|
new_subplan_indexes[i] = newidx++;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-08-02 01:42:46 +02:00
|
|
|
* Now we can update each PartitionedRelPruneInfo's subplan_map with
|
|
|
|
* new subplan indexes. We must also recompute its present_parts
|
|
|
|
* bitmap.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
2018-08-02 01:42:46 +02:00
|
|
|
for (i = 0; i < prunestate->num_partprunedata; i++)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionPruningData *prunedata = prunestate->partprunedata[i];
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
int j;
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* Within each hierarchy, we perform this loop in back-to-front
|
|
|
|
* order so that we determine present_parts for the lowest-level
|
|
|
|
* partitioned tables first. This way we can tell whether a
|
|
|
|
* sub-partitioned table's partitions were entirely pruned so we
|
2018-11-15 19:34:16 +01:00
|
|
|
* can exclude it from the current level's present_parts.
|
2018-08-02 01:42:46 +02:00
|
|
|
*/
|
|
|
|
for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
|
|
|
|
int nparts = pprune->context.nparts;
|
|
|
|
int k;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/* We just rebuild present_parts from scratch */
|
|
|
|
bms_free(pprune->present_parts);
|
|
|
|
pprune->present_parts = NULL;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
for (k = 0; k < nparts; k++)
|
2018-06-10 21:22:25 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
int oldidx = pprune->subplan_map[k];
|
|
|
|
int subidx;
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
|
|
|
* If this partition existed as a subplan then change the
|
|
|
|
* old subplan index to the new subplan index. The new
|
|
|
|
* index may become -1 if the partition was pruned above,
|
|
|
|
* or it may just come earlier in the subplan list due to
|
|
|
|
* some subplans being removed earlier in the list. If
|
|
|
|
* it's a subpartition, add it to present_parts unless
|
|
|
|
* it's entirely pruned.
|
|
|
|
*/
|
|
|
|
if (oldidx >= 0)
|
|
|
|
{
|
|
|
|
Assert(oldidx < nsubplans);
|
2018-11-15 19:34:16 +01:00
|
|
|
pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
|
2018-06-10 21:22:25 +02:00
|
|
|
|
2018-11-15 19:34:16 +01:00
|
|
|
if (new_subplan_indexes[oldidx] > 0)
|
2018-08-02 01:42:46 +02:00
|
|
|
pprune->present_parts =
|
|
|
|
bms_add_member(pprune->present_parts, k);
|
|
|
|
}
|
|
|
|
else if ((subidx = pprune->subpart_map[k]) >= 0)
|
|
|
|
{
|
|
|
|
PartitionedRelPruningData *subprune;
|
|
|
|
|
|
|
|
subprune = &prunedata->partrelprunedata[subidx];
|
|
|
|
|
|
|
|
if (!bms_is_empty(subprune->present_parts))
|
|
|
|
pprune->present_parts =
|
|
|
|
bms_add_member(pprune->present_parts, k);
|
|
|
|
}
|
2018-06-10 21:22:25 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-15 19:34:16 +01:00
|
|
|
/*
|
|
|
|
* We must also recompute the other_subplans set, since indexes in it
|
|
|
|
* may change.
|
|
|
|
*/
|
|
|
|
new_other_subplans = NULL;
|
|
|
|
i = -1;
|
|
|
|
while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
|
|
|
|
new_other_subplans = bms_add_member(new_other_subplans,
|
|
|
|
new_subplan_indexes[i] - 1);
|
|
|
|
|
|
|
|
bms_free(prunestate->other_subplans);
|
|
|
|
prunestate->other_subplans = new_other_subplans;
|
|
|
|
|
2018-06-11 00:24:34 +02:00
|
|
|
pfree(new_subplan_indexes);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecFindMatchingSubPlans
|
2018-04-09 16:17:12 +02:00
|
|
|
* Determine which subplans match the pruning steps detailed in
|
2018-06-11 00:24:34 +02:00
|
|
|
* 'prunestate' for the current comparison expression values.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*
|
2018-06-10 21:22:25 +02:00
|
|
|
* Here we assume we may evaluate PARAM_EXEC Params.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
Bitmapset *
|
|
|
|
ExecFindMatchingSubPlans(PartitionPruneState *prunestate)
|
|
|
|
{
|
|
|
|
Bitmapset *result = NULL;
|
2018-08-02 01:42:46 +02:00
|
|
|
MemoryContext oldcontext;
|
|
|
|
int i;
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-11-15 19:34:16 +01:00
|
|
|
/*
|
|
|
|
* If !do_exec_prune, we've got problems because
|
|
|
|
* ExecFindInitialMatchingSubPlans will not have bothered to update
|
|
|
|
* prunestate for whatever pruning it did.
|
|
|
|
*/
|
|
|
|
Assert(prunestate->do_exec_prune);
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
/*
|
|
|
|
* Switch to a temp context to avoid leaking memory in the executor's
|
2018-11-15 19:34:16 +01:00
|
|
|
* query-lifespan memory context.
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
|
|
|
|
|
2018-08-02 01:42:46 +02:00
|
|
|
/*
|
2018-11-15 19:34:16 +01:00
|
|
|
* For each hierarchy, do the pruning tests, and add nondeletable
|
|
|
|
* subplans' indexes to "result".
|
2018-08-02 01:42:46 +02:00
|
|
|
*/
|
|
|
|
for (i = 0; i < prunestate->num_partprunedata; i++)
|
|
|
|
{
|
|
|
|
PartitionPruningData *prunedata;
|
|
|
|
PartitionedRelPruningData *pprune;
|
|
|
|
|
|
|
|
prunedata = prunestate->partprunedata[i];
|
|
|
|
pprune = &prunedata->partrelprunedata[0];
|
|
|
|
|
|
|
|
find_matching_subplans_recurse(prunedata, pprune, false, &result);
|
|
|
|
|
|
|
|
/* Expression eval may have used space in node's ps_ExprContext too */
|
|
|
|
ResetExprContext(pprune->context.planstate->ps_ExprContext);
|
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-11-15 19:34:16 +01:00
|
|
|
/* Add in any subplans that partition pruning didn't account for */
|
|
|
|
result = bms_add_members(result, prunestate->other_subplans);
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
2018-06-10 21:22:25 +02:00
|
|
|
/* Copy result out of the temp context before we reset it */
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
result = bms_copy(result);
|
|
|
|
|
|
|
|
MemoryContextReset(prunestate->prune_context);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-06-10 21:22:25 +02:00
|
|
|
* find_matching_subplans_recurse
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
* Recursive worker function for ExecFindMatchingSubPlans and
|
|
|
|
* ExecFindInitialMatchingSubPlans
|
2018-06-10 21:22:25 +02:00
|
|
|
*
|
|
|
|
* Adds valid (non-prunable) subplan IDs to *validsubplans
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*/
|
|
|
|
static void
|
2018-08-02 01:42:46 +02:00
|
|
|
find_matching_subplans_recurse(PartitionPruningData *prunedata,
|
|
|
|
PartitionedRelPruningData *pprune,
|
2018-06-10 21:22:25 +02:00
|
|
|
bool initial_prune,
|
|
|
|
Bitmapset **validsubplans)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
|
|
|
Bitmapset *partset;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Guard against stack overflow due to overly deep partition hierarchy. */
|
|
|
|
check_stack_depth();
|
|
|
|
|
2018-06-10 21:22:25 +02:00
|
|
|
/* Only prune if pruning would be useful at this level. */
|
|
|
|
if (initial_prune ? pprune->do_initial_prune : pprune->do_exec_prune)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
{
|
2018-06-10 21:22:25 +02:00
|
|
|
PartitionPruneContext *context = &pprune->context;
|
|
|
|
|
|
|
|
/* Set whether we can evaluate PARAM_EXEC Params or not */
|
|
|
|
context->evalexecparams = !initial_prune;
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
partset = get_matching_partitions(context,
|
|
|
|
pprune->pruning_steps);
|
|
|
|
}
|
|
|
|
else
|
2018-06-10 21:22:25 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If no pruning is to be done, just include all partitions at this
|
|
|
|
* level.
|
|
|
|
*/
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
partset = pprune->present_parts;
|
2018-06-10 21:22:25 +02:00
|
|
|
}
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
2018-06-11 00:24:34 +02:00
|
|
|
/* Translate partset into subplan indexes */
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
i = -1;
|
|
|
|
while ((i = bms_next_member(partset, i)) >= 0)
|
|
|
|
{
|
2018-06-11 00:24:34 +02:00
|
|
|
if (pprune->subplan_map[i] >= 0)
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
*validsubplans = bms_add_member(*validsubplans,
|
2018-06-11 00:24:34 +02:00
|
|
|
pprune->subplan_map[i]);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
int partidx = pprune->subpart_map[i];
|
|
|
|
|
2018-06-11 00:24:34 +02:00
|
|
|
if (partidx >= 0)
|
2018-08-02 01:42:46 +02:00
|
|
|
find_matching_subplans_recurse(prunedata,
|
|
|
|
&prunedata->partrelprunedata[partidx],
|
2018-06-10 21:22:25 +02:00
|
|
|
initial_prune, validsubplans);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
else
|
|
|
|
{
|
2018-08-16 17:43:04 +02:00
|
|
|
/*
|
|
|
|
* We get here if the planner already pruned all the sub-
|
|
|
|
* partitions for this partition. Silently ignore this
|
|
|
|
* partition in this case. The end result is the same: we
|
|
|
|
* would have pruned all partitions just the same, but we
|
|
|
|
* don't have any pruning steps to execute to verify this.
|
|
|
|
*/
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|