2017-11-15 16:23:28 +01:00
|
|
|
/*--------------------------------------------------------------------
|
|
|
|
* execPartition.h
|
|
|
|
* POSTGRES partitioning executor interface
|
|
|
|
*
|
2018-01-03 05:30:12 +01:00
|
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2017-11-15 16:23:28 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/include/executor/execPartition.h
|
|
|
|
*--------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef EXECPARTITION_H
|
|
|
|
#define EXECPARTITION_H
|
|
|
|
|
|
|
|
#include "catalog/partition.h"
|
|
|
|
#include "nodes/execnodes.h"
|
|
|
|
#include "nodes/parsenodes.h"
|
|
|
|
#include "nodes/plannodes.h"
|
|
|
|
|
|
|
|
/*-----------------------
|
|
|
|
* PartitionDispatch - information about one partitioned table in a partition
|
|
|
|
* hierarchy required to route a tuple to one of its partitions
|
|
|
|
*
|
|
|
|
* reldesc Relation descriptor of the table
|
|
|
|
* key Partition key information of the table
|
|
|
|
* keystate Execution state required for expressions in the partition key
|
|
|
|
* partdesc Partition descriptor of the table
|
|
|
|
* tupslot A standalone TupleTableSlot initialized with this table's tuple
|
|
|
|
* descriptor
|
|
|
|
* tupmap TupleConversionMap to convert from the parent's rowtype to
|
|
|
|
* this table's rowtype (when extracting the partition key of a
|
|
|
|
* tuple just before routing it through this table)
|
|
|
|
* indexes Array with partdesc->nparts members (for details on what
|
|
|
|
* individual members represent, see how they are set in
|
|
|
|
* get_partition_dispatch_recurse())
|
|
|
|
*-----------------------
|
|
|
|
*/
|
|
|
|
typedef struct PartitionDispatchData
|
|
|
|
{
|
2017-11-29 15:24:24 +01:00
|
|
|
Relation reldesc;
|
|
|
|
PartitionKey key;
|
|
|
|
List *keystate; /* list of ExprState */
|
|
|
|
PartitionDesc partdesc;
|
2017-11-15 16:23:28 +01:00
|
|
|
TupleTableSlot *tupslot;
|
|
|
|
TupleConversionMap *tupmap;
|
2017-11-29 15:24:24 +01:00
|
|
|
int *indexes;
|
2017-11-15 16:23:28 +01:00
|
|
|
} PartitionDispatchData;
|
|
|
|
|
|
|
|
typedef struct PartitionDispatchData *PartitionDispatch;
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
/*-----------------------
|
|
|
|
* PartitionTupleRouting - Encapsulates all information required to execute
|
|
|
|
* tuple-routing between partitions.
|
|
|
|
*
|
|
|
|
* partition_dispatch_info Array of PartitionDispatch objects with one
|
|
|
|
* entry for every partitioned table in the
|
|
|
|
* partition tree.
|
|
|
|
* num_dispatch number of partitioned tables in the partition
|
|
|
|
* tree (= length of partition_dispatch_info[])
|
2018-02-26 21:05:46 +01:00
|
|
|
* partition_oids Array of leaf partitions OIDs with one entry
|
|
|
|
* for every leaf partition in the partition tree,
|
|
|
|
* initialized in full by
|
|
|
|
* ExecSetupPartitionTupleRouting.
|
2018-01-04 21:48:15 +01:00
|
|
|
* partitions Array of ResultRelInfo* objects with one entry
|
2018-02-26 21:05:46 +01:00
|
|
|
* for every leaf partition in the partition tree,
|
|
|
|
* initialized lazily by ExecInitPartitionInfo.
|
2018-01-04 21:48:15 +01:00
|
|
|
* num_partitions Number of leaf partitions in the partition tree
|
2018-02-26 21:05:46 +01:00
|
|
|
* (= 'partitions_oid'/'partitions' array length)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
* parent_child_tupconv_maps Array of TupleConversionMap objects with one
|
2018-01-04 21:48:15 +01:00
|
|
|
* entry for every leaf partition (required to
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
* convert tuple from the root table's rowtype to
|
|
|
|
* a leaf partition's rowtype after tuple routing
|
|
|
|
* is done)
|
|
|
|
* child_parent_tupconv_maps Array of TupleConversionMap objects with one
|
|
|
|
* entry for every leaf partition (required to
|
|
|
|
* convert an updated tuple from the leaf
|
|
|
|
* partition's rowtype to the root table's rowtype
|
|
|
|
* so that tuple routing can be done)
|
|
|
|
* child_parent_map_not_required Array of bool. True value means that a map is
|
|
|
|
* determined to be not required for the given
|
|
|
|
* partition. False means either we haven't yet
|
|
|
|
* checked if a map is required, or it was
|
|
|
|
* determined to be required.
|
|
|
|
* subplan_partition_offsets Integer array ordered by UPDATE subplans. Each
|
|
|
|
* element of this array has the index into the
|
|
|
|
* corresponding partition in partitions array.
|
2018-01-24 22:34:51 +01:00
|
|
|
* num_subplan_partition_offsets Length of 'subplan_partition_offsets' array
|
2018-01-04 21:48:15 +01:00
|
|
|
* partition_tuple_slot TupleTableSlot to be used to manipulate any
|
|
|
|
* given leaf partition's rowtype after that
|
|
|
|
* partition is chosen for insertion by
|
|
|
|
* tuple-routing.
|
|
|
|
*-----------------------
|
|
|
|
*/
|
|
|
|
typedef struct PartitionTupleRouting
|
|
|
|
{
|
|
|
|
PartitionDispatch *partition_dispatch_info;
|
|
|
|
int num_dispatch;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
Oid *partition_oids;
|
2018-01-04 21:48:15 +01:00
|
|
|
ResultRelInfo **partitions;
|
|
|
|
int num_partitions;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
TupleConversionMap **parent_child_tupconv_maps;
|
|
|
|
TupleConversionMap **child_parent_tupconv_maps;
|
|
|
|
bool *child_parent_map_not_required;
|
|
|
|
int *subplan_partition_offsets;
|
2018-01-24 22:34:51 +01:00
|
|
|
int num_subplan_partition_offsets;
|
2018-01-04 21:48:15 +01:00
|
|
|
TupleTableSlot *partition_tuple_slot;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
TupleTableSlot *root_tuple_slot;
|
2018-01-04 21:48:15 +01:00
|
|
|
} PartitionTupleRouting;
|
|
|
|
|
|
|
|
extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
Relation rel);
|
2017-11-15 16:23:28 +01:00
|
|
|
extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
|
|
|
|
PartitionDispatch *pd,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate);
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
extern int ExecFindPartitionByOid(PartitionTupleRouting *proute, Oid partoid);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
|
|
|
|
ResultRelInfo *resultRelInfo,
|
|
|
|
PartitionTupleRouting *proute,
|
|
|
|
EState *estate, int partidx);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute);
|
|
|
|
extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute,
|
|
|
|
ResultRelInfo *rootRelInfo, int leaf_index);
|
|
|
|
extern HeapTuple ConvertPartitionTupleSlot(TupleConversionMap *map,
|
|
|
|
HeapTuple tuple,
|
|
|
|
TupleTableSlot *new_slot,
|
|
|
|
TupleTableSlot **p_my_slot);
|
2018-01-04 21:48:15 +01:00
|
|
|
extern void ExecCleanupTupleRouting(PartitionTupleRouting *proute);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
#endif /* EXECPARTITION_H */
|