2017-11-15 16:23:28 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* execPartition.c
|
|
|
|
* Support routines for partitioning.
|
|
|
|
*
|
2018-01-03 05:30:12 +01:00
|
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2017-11-15 16:23:28 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/executor/execPartition.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "catalog/pg_inherits_fn.h"
|
2018-03-26 15:43:54 +02:00
|
|
|
#include "catalog/pg_type.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "executor/execPartition.h"
|
|
|
|
#include "executor/executor.h"
|
|
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#include "miscadmin.h"
|
2018-03-26 15:43:54 +02:00
|
|
|
#include "nodes/makefuncs.h"
|
2017-11-15 16:23:28 +01:00
|
|
|
#include "utils/lsyscache.h"
|
|
|
|
#include "utils/rls.h"
|
|
|
|
#include "utils/ruleutils.h"
|
|
|
|
|
|
|
|
static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
|
|
|
|
int *num_parted, List **leaf_part_oids);
|
|
|
|
static void get_partition_dispatch_recurse(Relation rel, Relation parent,
|
|
|
|
List **pds, List **leaf_part_oids);
|
|
|
|
static void FormPartitionKeyDatum(PartitionDispatch pd,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull);
|
|
|
|
static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull,
|
|
|
|
int maxfieldlen);
|
2018-03-26 15:43:54 +02:00
|
|
|
static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map);
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
2018-01-04 21:48:15 +01:00
|
|
|
* ExecSetupPartitionTupleRouting - sets up information needed during
|
|
|
|
* tuple routing for partitioned tables, encapsulates it in
|
|
|
|
* PartitionTupleRouting, and returns it.
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
|
|
|
* Note that all the relations in the partition tree are locked using the
|
|
|
|
* RowExclusiveLock mode upon return from this function.
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
*
|
|
|
|
* While we allocate the arrays of pointers of ResultRelInfo and
|
|
|
|
* TupleConversionMap for all partitions here, actual objects themselves are
|
|
|
|
* lazily allocated for a given partition if a tuple is actually routed to it;
|
|
|
|
* see ExecInitPartitionInfo. However, if the function is invoked for update
|
|
|
|
* tuple routing, caller would already have initialized ResultRelInfo's for
|
|
|
|
* some of the partitions, which are reused and assigned to their respective
|
|
|
|
* slot in the aforementioned array.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
2018-01-04 21:48:15 +01:00
|
|
|
PartitionTupleRouting *
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
|
2017-11-15 16:23:28 +01:00
|
|
|
{
|
|
|
|
TupleDesc tupDesc = RelationGetDescr(rel);
|
|
|
|
List *leaf_parts;
|
|
|
|
ListCell *cell;
|
|
|
|
int i;
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ResultRelInfo *update_rri = NULL;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
int num_update_rri = 0,
|
|
|
|
update_rri_index = 0;
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
bool is_update = false;
|
|
|
|
bool is_merge = false;
|
2018-01-04 21:48:15 +01:00
|
|
|
PartitionTupleRouting *proute;
|
2018-03-26 15:43:54 +02:00
|
|
|
int nparts;
|
|
|
|
ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the information about the partition tree after locking all the
|
|
|
|
* partitions.
|
|
|
|
*/
|
|
|
|
(void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
|
2018-01-04 21:48:15 +01:00
|
|
|
proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
|
|
|
|
proute->partition_dispatch_info =
|
|
|
|
RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch,
|
|
|
|
&leaf_parts);
|
2018-03-26 15:43:54 +02:00
|
|
|
proute->num_partitions = nparts = list_length(leaf_parts);
|
|
|
|
proute->partitions =
|
|
|
|
(ResultRelInfo **) palloc(nparts * sizeof(ResultRelInfo *));
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
proute->parent_child_tupconv_maps =
|
2018-03-26 15:43:54 +02:00
|
|
|
(TupleConversionMap **) palloc0(nparts * sizeof(TupleConversionMap *));
|
|
|
|
proute->partition_oids = (Oid *) palloc(nparts * sizeof(Oid));
|
2017-11-15 16:23:28 +01:00
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/* Set up details specific to the type of tuple routing we are doing. */
|
2018-03-26 15:43:54 +02:00
|
|
|
if (node && node->operation == CMD_UPDATE)
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
is_update = true;
|
|
|
|
else if (node && node->operation == CMD_MERGE)
|
|
|
|
is_merge = true;
|
|
|
|
|
|
|
|
if (is_update)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
{
|
|
|
|
update_rri = mtstate->resultRelInfo;
|
|
|
|
num_update_rri = list_length(node->plans);
|
|
|
|
proute->subplan_partition_offsets =
|
|
|
|
palloc(num_update_rri * sizeof(int));
|
2018-01-24 22:34:51 +01:00
|
|
|
proute->num_subplan_partition_offsets = num_update_rri;
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
}
|
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
if (is_update || is_merge)
|
|
|
|
{
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
|
|
|
* We need an additional tuple slot for storing transient tuples that
|
|
|
|
* are converted to the root table descriptor.
|
|
|
|
*/
|
2018-02-17 06:17:38 +01:00
|
|
|
proute->root_tuple_slot = MakeTupleTableSlot(NULL);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
}
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
/*
|
|
|
|
* Initialize an empty slot that will be used to manipulate tuples of any
|
|
|
|
* given partition's rowtype. It is attached to the caller-specified node
|
|
|
|
* (such as ModifyTableState) and released when the node finishes
|
|
|
|
* processing.
|
|
|
|
*/
|
2018-02-17 06:17:38 +01:00
|
|
|
proute->partition_tuple_slot = MakeTupleTableSlot(NULL);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
foreach(cell, leaf_parts)
|
|
|
|
{
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
ResultRelInfo *leaf_part_rri = NULL;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
Oid leaf_oid = lfirst_oid(cell);
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
proute->partition_oids[i] = leaf_oid;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/*
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
* If the leaf partition is already present in the per-subplan result
|
|
|
|
* rels, we re-use that rather than initialize a new result rel. The
|
|
|
|
* per-subplan resultrels and the resultrels of the leaf partitions
|
|
|
|
* are both in the same canonical order. So while going through the
|
|
|
|
* leaf partition oids, we need to keep track of the next per-subplan
|
|
|
|
* result rel to be looked for in the leaf partition resultrels.
|
2017-11-15 16:23:28 +01:00
|
|
|
*/
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
if (update_rri_index < num_update_rri &&
|
|
|
|
RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
{
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
Relation partrel;
|
|
|
|
TupleDesc part_tupdesc;
|
|
|
|
|
|
|
|
leaf_part_rri = &update_rri[update_rri_index];
|
|
|
|
partrel = leaf_part_rri->ri_RelationDesc;
|
2018-02-08 20:29:05 +01:00
|
|
|
|
|
|
|
/*
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
* This is required in order to convert the partition's tuple to
|
|
|
|
* be compatible with the root partitioned table's tuple
|
|
|
|
* descriptor. When generating the per-subplan result rels, this
|
|
|
|
* was not set.
|
2018-02-08 20:29:05 +01:00
|
|
|
*/
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
leaf_part_rri->ri_PartitionRoot = rel;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/* Remember the subplan offset for this ResultRelInfo */
|
|
|
|
proute->subplan_partition_offsets[update_rri_index] = i;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
update_rri_index++;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
part_tupdesc = RelationGetDescr(partrel);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/*
|
|
|
|
* Save a tuple conversion map to convert a tuple routed to this
|
|
|
|
* partition from the parent's type to the partition's.
|
|
|
|
*/
|
|
|
|
proute->parent_child_tupconv_maps[i] =
|
|
|
|
convert_tuples_by_name(tupDesc, part_tupdesc,
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify result relation is a valid target for an INSERT. An
|
|
|
|
* UPDATE of a partition-key becomes a DELETE+INSERT operation, so
|
|
|
|
* this check is required even when the operation is CMD_UPDATE.
|
|
|
|
*/
|
|
|
|
CheckValidResultRel(leaf_part_rri, CMD_INSERT);
|
|
|
|
}
|
2017-11-15 16:23:28 +01:00
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
proute->partitions[i] = leaf_part_rri;
|
2017-11-15 16:23:28 +01:00
|
|
|
i++;
|
|
|
|
}
|
2018-01-04 21:48:15 +01:00
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
|
|
|
* For UPDATE, we should have found all the per-subplan resultrels in the
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
* leaf partitions. (If this is an INSERT, both values will be zero.)
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
*/
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
Assert(update_rri_index == num_update_rri);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
return proute;
|
2017-11-15 16:23:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecFindPartition -- Find a leaf partition in the partition tree rooted
|
|
|
|
* at parent, for the heap tuple contained in *slot
|
|
|
|
*
|
|
|
|
* estate must be non-NULL; we'll need it to compute any expressions in the
|
|
|
|
* partition key(s)
|
|
|
|
*
|
|
|
|
* If no leaf partition is found, this routine errors out with the appropriate
|
|
|
|
* error message, else it returns the leaf partition sequence number
|
|
|
|
* as an index into the array of (ResultRelInfos of) all leaf partitions in
|
|
|
|
* the partition tree.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
|
|
|
|
TupleTableSlot *slot, EState *estate)
|
|
|
|
{
|
|
|
|
int result;
|
|
|
|
Datum values[PARTITION_MAX_KEYS];
|
|
|
|
bool isnull[PARTITION_MAX_KEYS];
|
|
|
|
Relation rel;
|
|
|
|
PartitionDispatch parent;
|
|
|
|
ExprContext *ecxt = GetPerTupleExprContext(estate);
|
|
|
|
TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First check the root table's partition constraint, if any. No point in
|
|
|
|
* routing the tuple if it doesn't belong in the root table itself.
|
|
|
|
*/
|
2018-01-05 21:18:03 +01:00
|
|
|
if (resultRelInfo->ri_PartitionCheck &&
|
|
|
|
!ExecPartitionCheck(resultRelInfo, slot, estate))
|
|
|
|
ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
/* start with the root partitioned table */
|
|
|
|
parent = pd[0];
|
|
|
|
while (true)
|
|
|
|
{
|
2017-11-29 15:24:24 +01:00
|
|
|
PartitionDesc partdesc;
|
2017-11-15 16:23:28 +01:00
|
|
|
TupleTableSlot *myslot = parent->tupslot;
|
|
|
|
TupleConversionMap *map = parent->tupmap;
|
2017-11-29 15:24:24 +01:00
|
|
|
int cur_index = -1;
|
2017-11-15 16:23:28 +01:00
|
|
|
|
|
|
|
rel = parent->reldesc;
|
|
|
|
partdesc = RelationGetPartitionDesc(rel);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert the tuple to this parent's layout so that we can do certain
|
|
|
|
* things we do below.
|
|
|
|
*/
|
|
|
|
if (myslot != NULL && map != NULL)
|
|
|
|
{
|
|
|
|
HeapTuple tuple = ExecFetchSlotTuple(slot);
|
|
|
|
|
|
|
|
ExecClearTuple(myslot);
|
|
|
|
tuple = do_convert_tuple(tuple, map);
|
|
|
|
ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
|
|
|
|
slot = myslot;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Extract partition key from tuple. Expression evaluation machinery
|
|
|
|
* that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
|
|
|
|
* point to the correct tuple slot. The slot might have changed from
|
|
|
|
* what was used for the parent table if the table of the current
|
|
|
|
* partitioning level has different tuple descriptor from the parent.
|
|
|
|
* So update ecxt_scantuple accordingly.
|
|
|
|
*/
|
|
|
|
ecxt->ecxt_scantuple = slot;
|
|
|
|
FormPartitionKeyDatum(parent, slot, estate, values, isnull);
|
2017-12-01 16:01:50 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Nothing for get_partition_for_tuple() to do if there are no
|
|
|
|
* partitions to begin with.
|
|
|
|
*/
|
|
|
|
if (partdesc->nparts == 0)
|
|
|
|
{
|
|
|
|
result = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
cur_index = get_partition_for_tuple(rel, values, isnull);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cur_index < 0 means we failed to find a partition of this parent.
|
|
|
|
* cur_index >= 0 means we either found the leaf partition, or the
|
|
|
|
* next parent to find a partition of.
|
|
|
|
*/
|
|
|
|
if (cur_index < 0)
|
|
|
|
{
|
|
|
|
result = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (parent->indexes[cur_index] >= 0)
|
|
|
|
{
|
|
|
|
result = parent->indexes[cur_index];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
parent = pd[-parent->indexes[cur_index]];
|
|
|
|
}
|
|
|
|
|
|
|
|
/* A partition was not found. */
|
|
|
|
if (result < 0)
|
|
|
|
{
|
|
|
|
char *val_desc;
|
|
|
|
|
|
|
|
val_desc = ExecBuildSlotPartitionKeyDescription(rel,
|
|
|
|
values, isnull, 64);
|
|
|
|
Assert(OidIsValid(RelationGetRelid(rel)));
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
|
|
|
errmsg("no partition of relation \"%s\" found for row",
|
|
|
|
RelationGetRelationName(rel)),
|
|
|
|
val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
ecxt->ecxt_scantuple = ecxt_scantuple_old;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
/*
|
|
|
|
* Given OID of the partition leaf, return the index of the leaf in the
|
|
|
|
* partition hierarchy.
|
2018-04-05 10:54:07 +02:00
|
|
|
*
|
2018-04-05 14:19:13 +02:00
|
|
|
* XXX This is an O(N) operation and further optimization would be beneficial
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
ExecFindPartitionByOid(PartitionTupleRouting *proute, Oid partoid)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < proute->num_partitions; i++)
|
|
|
|
{
|
|
|
|
if (proute->partition_oids[i] == partoid)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-04-05 10:54:07 +02:00
|
|
|
if (i >= proute->num_partitions)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
|
|
errmsg("no partition found for OID %u", partoid)));
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/*
|
|
|
|
* ExecInitPartitionInfo
|
|
|
|
* Initialize ResultRelInfo and other information for a partition if not
|
|
|
|
* already done
|
|
|
|
*
|
|
|
|
* Returns the ResultRelInfo
|
|
|
|
*/
|
|
|
|
ResultRelInfo *
|
|
|
|
ExecInitPartitionInfo(ModifyTableState *mtstate,
|
|
|
|
ResultRelInfo *resultRelInfo,
|
|
|
|
PartitionTupleRouting *proute,
|
|
|
|
EState *estate, int partidx)
|
|
|
|
{
|
|
|
|
Relation rootrel = resultRelInfo->ri_RelationDesc,
|
|
|
|
partrel;
|
|
|
|
ResultRelInfo *leaf_part_rri;
|
|
|
|
ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
|
|
|
|
MemoryContext oldContext;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We locked all the partitions in ExecSetupPartitionTupleRouting
|
|
|
|
* including the leaf partitions.
|
|
|
|
*/
|
|
|
|
partrel = heap_open(proute->partition_oids[partidx], NoLock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep ResultRelInfo and other information for this partition in the
|
|
|
|
* per-query memory context so they'll survive throughout the query.
|
|
|
|
*/
|
|
|
|
oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
|
2018-03-26 15:43:54 +02:00
|
|
|
leaf_part_rri = makeNode(ResultRelInfo);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
InitResultRelInfo(leaf_part_rri,
|
|
|
|
partrel,
|
|
|
|
node ? node->nominalRelation : 1,
|
|
|
|
rootrel,
|
|
|
|
estate->es_instrument);
|
|
|
|
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
leaf_part_rri->ri_PartitionLeafIndex = partidx;
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/*
|
|
|
|
* Verify result relation is a valid target for an INSERT. An UPDATE of a
|
|
|
|
* partition-key becomes a DELETE+INSERT operation, so this check is still
|
|
|
|
* required when the operation is CMD_UPDATE.
|
|
|
|
*/
|
|
|
|
CheckValidResultRel(leaf_part_rri, CMD_INSERT);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since we've just initialized this ResultRelInfo, it's not in any list
|
|
|
|
* attached to the estate as yet. Add it, so that it can be found later.
|
|
|
|
*
|
|
|
|
* Note that the entries in this list appear in no predetermined order,
|
|
|
|
* because partition result rels are initialized as and when they're
|
|
|
|
* needed.
|
|
|
|
*/
|
|
|
|
estate->es_tuple_routing_result_relations =
|
|
|
|
lappend(estate->es_tuple_routing_result_relations,
|
|
|
|
leaf_part_rri);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Open partition indices. The user may have asked to check for conflicts
|
|
|
|
* within this leaf partition and do "nothing" instead of throwing an
|
|
|
|
* error. Be prepared in that case by initializing the index information
|
|
|
|
* needed by ExecInsert() to perform speculative insertions.
|
|
|
|
*/
|
|
|
|
if (partrel->rd_rel->relhasindex &&
|
|
|
|
leaf_part_rri->ri_IndexRelationDescs == NULL)
|
|
|
|
ExecOpenIndices(leaf_part_rri,
|
2018-03-19 22:09:43 +01:00
|
|
|
(node != NULL &&
|
|
|
|
node->onConflictAction != ONCONFLICT_NONE));
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Build WITH CHECK OPTION constraints for the partition. Note that we
|
|
|
|
* didn't build the withCheckOptionList for partitions within the planner,
|
|
|
|
* but simple translation of varattnos will suffice. This only occurs for
|
|
|
|
* the INSERT case or in the case of UPDATE tuple routing where we didn't
|
|
|
|
* find a result rel to reuse in ExecSetupPartitionTupleRouting().
|
|
|
|
*/
|
|
|
|
if (node && node->withCheckOptionLists != NIL)
|
|
|
|
{
|
|
|
|
List *wcoList;
|
|
|
|
List *wcoExprs = NIL;
|
|
|
|
ListCell *ll;
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the case of INSERT on a partitioned table, there is only one
|
|
|
|
* plan. Likewise, there is only one WCO list, not one per partition.
|
|
|
|
* For UPDATE, there are as many WCO lists as there are plans.
|
|
|
|
*/
|
|
|
|
Assert((node->operation == CMD_INSERT &&
|
|
|
|
list_length(node->withCheckOptionLists) == 1 &&
|
|
|
|
list_length(node->plans) == 1) ||
|
|
|
|
(node->operation == CMD_UPDATE &&
|
|
|
|
list_length(node->withCheckOptionLists) ==
|
|
|
|
list_length(node->plans)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the WCO list of the first plan as a reference to calculate
|
|
|
|
* attno's for the WCO list of this partition. In the INSERT case,
|
|
|
|
* that refers to the root partitioned table, whereas in the UPDATE
|
|
|
|
* tuple routing case, that refers to the first partition in the
|
|
|
|
* mtstate->resultRelInfo array. In any case, both that relation and
|
|
|
|
* this partition should have the same columns, so we should be able
|
|
|
|
* to map attributes successfully.
|
|
|
|
*/
|
|
|
|
wcoList = linitial(node->withCheckOptionLists);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert Vars in it to contain this partition's attribute numbers.
|
|
|
|
*/
|
|
|
|
wcoList = map_partition_varattnos(wcoList, firstVarno,
|
|
|
|
partrel, firstResultRel, NULL);
|
|
|
|
foreach(ll, wcoList)
|
|
|
|
{
|
|
|
|
WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
|
|
|
|
ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
|
2018-03-06 02:49:59 +01:00
|
|
|
&mtstate->ps);
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
|
|
|
|
wcoExprs = lappend(wcoExprs, wcoExpr);
|
|
|
|
}
|
|
|
|
|
|
|
|
leaf_part_rri->ri_WithCheckOptions = wcoList;
|
|
|
|
leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build the RETURNING projection for the partition. Note that we didn't
|
|
|
|
* build the returningList for partitions within the planner, but simple
|
|
|
|
* translation of varattnos will suffice. This only occurs for the INSERT
|
|
|
|
* case or in the case of UPDATE tuple routing where we didn't find a
|
|
|
|
* result rel to reuse in ExecSetupPartitionTupleRouting().
|
|
|
|
*/
|
|
|
|
if (node && node->returningLists != NIL)
|
|
|
|
{
|
|
|
|
TupleTableSlot *slot;
|
|
|
|
ExprContext *econtext;
|
|
|
|
List *returningList;
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
|
|
|
|
|
|
|
|
/* See the comment above for WCO lists. */
|
|
|
|
Assert((node->operation == CMD_INSERT &&
|
|
|
|
list_length(node->returningLists) == 1 &&
|
|
|
|
list_length(node->plans) == 1) ||
|
|
|
|
(node->operation == CMD_UPDATE &&
|
|
|
|
list_length(node->returningLists) ==
|
|
|
|
list_length(node->plans)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the RETURNING list of the first plan as a reference to
|
|
|
|
* calculate attno's for the RETURNING list of this partition. See
|
|
|
|
* the comment above for WCO lists for more details on why this is
|
|
|
|
* okay.
|
|
|
|
*/
|
|
|
|
returningList = linitial(node->returningLists);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert Vars in it to contain this partition's attribute numbers.
|
|
|
|
*/
|
|
|
|
returningList = map_partition_varattnos(returningList, firstVarno,
|
|
|
|
partrel, firstResultRel,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the projection itself.
|
|
|
|
*
|
|
|
|
* Use the slot and the expression context that would have been set up
|
|
|
|
* in ExecInitModifyTable() for projection's output.
|
|
|
|
*/
|
|
|
|
Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
|
|
|
|
slot = mtstate->ps.ps_ResultTupleSlot;
|
|
|
|
Assert(mtstate->ps.ps_ExprContext != NULL);
|
|
|
|
econtext = mtstate->ps.ps_ExprContext;
|
|
|
|
leaf_part_rri->ri_projectReturning =
|
|
|
|
ExecBuildProjectionInfo(returningList, econtext, slot,
|
|
|
|
&mtstate->ps, RelationGetDescr(partrel));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save a tuple conversion map to convert a tuple routed to this partition
|
|
|
|
* from the parent's type to the partition's.
|
|
|
|
*/
|
|
|
|
proute->parent_child_tupconv_maps[partidx] =
|
|
|
|
convert_tuples_by_name(RelationGetDescr(rootrel),
|
|
|
|
RelationGetDescr(partrel),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
|
2018-03-26 15:43:54 +02:00
|
|
|
/*
|
|
|
|
* If there is an ON CONFLICT clause, initialize state for it.
|
|
|
|
*/
|
|
|
|
if (node && node->onConflictAction != ONCONFLICT_NONE)
|
|
|
|
{
|
|
|
|
TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx];
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
|
|
|
|
TupleDesc partrelDesc = RelationGetDescr(partrel);
|
|
|
|
ExprContext *econtext = mtstate->ps.ps_ExprContext;
|
|
|
|
ListCell *lc;
|
|
|
|
List *arbiterIndexes = NIL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is a list of arbiter indexes, map it to a list of indexes
|
|
|
|
* in the partition. We do that by scanning the partition's index
|
|
|
|
* list and searching for ancestry relationships to each index in the
|
|
|
|
* ancestor table.
|
|
|
|
*/
|
|
|
|
if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) > 0)
|
|
|
|
{
|
|
|
|
List *childIdxs;
|
|
|
|
|
|
|
|
childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
|
|
|
|
|
|
|
|
foreach(lc, childIdxs)
|
|
|
|
{
|
|
|
|
Oid childIdx = lfirst_oid(lc);
|
|
|
|
List *ancestors;
|
|
|
|
ListCell *lc2;
|
|
|
|
|
|
|
|
ancestors = get_partition_ancestors(childIdx);
|
|
|
|
foreach(lc2, resultRelInfo->ri_onConflictArbiterIndexes)
|
|
|
|
{
|
|
|
|
if (list_member_oid(ancestors, lfirst_oid(lc2)))
|
|
|
|
arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
|
|
|
|
}
|
|
|
|
list_free(ancestors);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the resulting lists are of inequal length, something is wrong.
|
|
|
|
* (This shouldn't happen, since arbiter index selection should not
|
|
|
|
* pick up an invalid index.)
|
|
|
|
*/
|
|
|
|
if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) !=
|
|
|
|
list_length(arbiterIndexes))
|
|
|
|
elog(ERROR, "invalid arbiter index list");
|
|
|
|
leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the DO UPDATE case, we have some more state to initialize.
|
|
|
|
*/
|
|
|
|
if (node->onConflictAction == ONCONFLICT_UPDATE)
|
|
|
|
{
|
|
|
|
Assert(node->onConflictSet != NIL);
|
|
|
|
Assert(resultRelInfo->ri_onConflict != NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the partition's tuple descriptor matches exactly the root
|
|
|
|
* parent (the common case), we can simply re-use the parent's ON
|
|
|
|
* CONFLICT SET state, skipping a bunch of work. Otherwise, we
|
|
|
|
* need to create state specific to this partition.
|
|
|
|
*/
|
|
|
|
if (map == NULL)
|
|
|
|
leaf_part_rri->ri_onConflict = resultRelInfo->ri_onConflict;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
List *onconflset;
|
|
|
|
TupleDesc tupDesc;
|
|
|
|
bool found_whole_row;
|
|
|
|
|
|
|
|
leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Translate expressions in onConflictSet to account for
|
|
|
|
* different attribute numbers. For that, map partition
|
|
|
|
* varattnos twice: first to catch the EXCLUDED
|
|
|
|
* pseudo-relation (INNER_VAR), and second to handle the main
|
|
|
|
* target relation (firstVarno).
|
|
|
|
*/
|
|
|
|
onconflset = (List *) copyObject((Node *) node->onConflictSet);
|
|
|
|
onconflset =
|
|
|
|
map_partition_varattnos(onconflset, INNER_VAR, partrel,
|
|
|
|
firstResultRel, &found_whole_row);
|
|
|
|
Assert(!found_whole_row);
|
|
|
|
onconflset =
|
|
|
|
map_partition_varattnos(onconflset, firstVarno, partrel,
|
|
|
|
firstResultRel, &found_whole_row);
|
|
|
|
Assert(!found_whole_row);
|
|
|
|
|
|
|
|
/* Finally, adjust this tlist to match the partition. */
|
|
|
|
onconflset = adjust_partition_tlist(onconflset, map);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build UPDATE SET's projection info. The user of this
|
|
|
|
* projection is responsible for setting the slot's tupdesc!
|
|
|
|
* We set aside a tupdesc that's good for the common case of a
|
|
|
|
* partition that's tupdesc-equal to the partitioned table;
|
|
|
|
* partitions of different tupdescs must generate their own.
|
|
|
|
*/
|
|
|
|
tupDesc = ExecTypeFromTL(onconflset, partrelDesc->tdhasoid);
|
|
|
|
ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc);
|
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjInfo =
|
|
|
|
ExecBuildProjectionInfo(onconflset, econtext,
|
|
|
|
mtstate->mt_conflproj,
|
|
|
|
&mtstate->ps, partrelDesc);
|
|
|
|
leaf_part_rri->ri_onConflict->oc_ProjTupdesc = tupDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is a WHERE clause, initialize state where it will
|
|
|
|
* be evaluated, mapping the attribute numbers appropriately.
|
|
|
|
* As with onConflictSet, we need to map partition varattnos
|
|
|
|
* to the partition's tupdesc.
|
|
|
|
*/
|
|
|
|
if (node->onConflictWhere)
|
|
|
|
{
|
|
|
|
List *clause;
|
|
|
|
|
|
|
|
clause = copyObject((List *) node->onConflictWhere);
|
|
|
|
clause = map_partition_varattnos(clause, INNER_VAR,
|
|
|
|
partrel, firstResultRel,
|
|
|
|
&found_whole_row);
|
|
|
|
Assert(!found_whole_row);
|
|
|
|
clause = map_partition_varattnos(clause, firstVarno,
|
|
|
|
partrel, firstResultRel,
|
|
|
|
&found_whole_row);
|
|
|
|
Assert(!found_whole_row);
|
|
|
|
leaf_part_rri->ri_onConflict->oc_WhereClause =
|
|
|
|
ExecInitQual((List *) clause, &mtstate->ps);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Assert(proute->partitions[partidx] == NULL);
|
|
|
|
proute->partitions[partidx] = leaf_part_rri;
|
|
|
|
|
MERGE SQL Command following SQL:2016
MERGE performs actions that modify rows in the target table
using a source table or query. MERGE provides a single SQL
statement that can conditionally INSERT/UPDATE/DELETE rows
a task that would other require multiple PL statements.
e.g.
MERGE INTO target AS t
USING source AS s
ON t.tid = s.sid
WHEN MATCHED AND t.balance > s.delta THEN
UPDATE SET balance = t.balance - s.delta
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED AND s.delta > 0 THEN
INSERT VALUES (s.sid, s.delta)
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE works with regular and partitioned tables, including
column and row security enforcement, as well as support for
row, statement and transition triggers.
MERGE is optimized for OLTP and is parameterizable, though
also useful for large scale ETL/ELT. MERGE is not intended
to be used in preference to existing single SQL commands
for INSERT, UPDATE or DELETE since there is some overhead.
MERGE can be used statically from PL/pgSQL.
MERGE does not yet support inheritance, write rules,
RETURNING clauses, updatable views or foreign tables.
MERGE follows SQL Standard per the most recent SQL:2016.
Includes full tests and documentation, including full
isolation tests to demonstrate the concurrent behavior.
This version written from scratch in 2017 by Simon Riggs,
using docs and tests originally written in 2009. Later work
from Pavan Deolasee has been both complex and deep, leaving
the lead author credit now in his hands.
Extensive discussion of concurrency from Peter Geoghegan,
with thanks for the time and effort contributed.
Various issues reported via sqlsmith by Andreas Seltenreich
Authors: Pavan Deolasee, Simon Riggs
Reviewer: Peter Geoghegan, Amit Langote, Tomas Vondra, Simon Riggs
Discussion:
https://postgr.es/m/CANP8+jKitBSrB7oTgT9CY2i1ObfOt36z0XMraQc+Xrz8QB0nXA@mail.gmail.com
https://postgr.es/m/CAH2-WzkJdBuxj9PO=2QaO9-3h3xGbQPZ34kJH=HukRekwM-GZg@mail.gmail.com
2018-04-03 10:28:16 +02:00
|
|
|
/*
|
|
|
|
* Initialize information about this partition that's needed to handle
|
|
|
|
* MERGE.
|
|
|
|
*/
|
|
|
|
if (node && node->operation == CMD_MERGE)
|
|
|
|
{
|
|
|
|
TupleDesc partrelDesc = RelationGetDescr(partrel);
|
|
|
|
TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx];
|
|
|
|
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
|
|
|
|
Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the root parent and partition have the same tuple
|
|
|
|
* descriptor, just reuse the original MERGE state for partition.
|
|
|
|
*/
|
|
|
|
if (map == NULL)
|
|
|
|
{
|
|
|
|
leaf_part_rri->ri_mergeState = resultRelInfo->ri_mergeState;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Convert expressions contain partition's attnos. */
|
|
|
|
List *conv_tl, *conv_qual;
|
|
|
|
ListCell *l;
|
|
|
|
List *matchedActionStates = NIL;
|
|
|
|
List *notMatchedActionStates = NIL;
|
|
|
|
|
|
|
|
foreach (l, node->mergeActionList)
|
|
|
|
{
|
|
|
|
MergeAction *action = lfirst_node(MergeAction, l);
|
|
|
|
MergeActionState *action_state = makeNode(MergeActionState);
|
|
|
|
TupleDesc tupDesc;
|
|
|
|
ExprContext *econtext;
|
|
|
|
|
|
|
|
action_state->matched = action->matched;
|
|
|
|
action_state->commandType = action->commandType;
|
|
|
|
|
|
|
|
conv_qual = (List *) action->qual;
|
|
|
|
conv_qual = map_partition_varattnos(conv_qual,
|
|
|
|
firstVarno, partrel,
|
|
|
|
firstResultRel, NULL);
|
|
|
|
|
|
|
|
action_state->whenqual = ExecInitQual(conv_qual, &mtstate->ps);
|
|
|
|
|
|
|
|
conv_tl = (List *) action->targetList;
|
|
|
|
conv_tl = map_partition_varattnos(conv_tl,
|
|
|
|
firstVarno, partrel,
|
|
|
|
firstResultRel, NULL);
|
|
|
|
|
|
|
|
conv_tl = adjust_partition_tlist( conv_tl, map);
|
|
|
|
|
|
|
|
tupDesc = ExecTypeFromTL(conv_tl, partrelDesc->tdhasoid);
|
|
|
|
action_state->tupDesc = tupDesc;
|
|
|
|
|
|
|
|
/* build action projection state */
|
|
|
|
econtext = mtstate->ps.ps_ExprContext;
|
|
|
|
action_state->proj =
|
|
|
|
ExecBuildProjectionInfo(conv_tl, econtext,
|
|
|
|
mtstate->mt_mergeproj,
|
|
|
|
&mtstate->ps,
|
|
|
|
partrelDesc);
|
|
|
|
|
|
|
|
if (action_state->matched)
|
|
|
|
matchedActionStates =
|
|
|
|
lappend(matchedActionStates, action_state);
|
|
|
|
else
|
|
|
|
notMatchedActionStates =
|
|
|
|
lappend(notMatchedActionStates, action_state);
|
|
|
|
}
|
|
|
|
leaf_part_rri->ri_mergeState->matchedActionStates =
|
|
|
|
matchedActionStates;
|
|
|
|
leaf_part_rri->ri_mergeState->notMatchedActionStates =
|
|
|
|
notMatchedActionStates;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get_partition_dispatch_recurse() and expand_partitioned_rtentry()
|
|
|
|
* fetch the leaf OIDs in the same order. So we can safely derive the
|
|
|
|
* index of the merge target relation corresponding to this partition
|
|
|
|
* by simply adding partidx + 1 to the root's merge target relation.
|
|
|
|
*/
|
|
|
|
leaf_part_rri->ri_mergeTargetRTI = node->mergeTargetRelation +
|
|
|
|
partidx + 1;
|
|
|
|
}
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
MemoryContextSwitchTo(oldContext);
|
|
|
|
|
|
|
|
return leaf_part_rri;
|
|
|
|
}
|
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
|
|
|
* ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition
|
|
|
|
* child-to-root tuple conversion map array.
|
|
|
|
*
|
|
|
|
* This map is required for capturing transition tuples when the target table
|
|
|
|
* is a partitioned table. For a tuple that is routed by an INSERT or UPDATE,
|
|
|
|
* we need to convert it from the leaf partition to the target table
|
|
|
|
* descriptor.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute)
|
|
|
|
{
|
|
|
|
Assert(proute != NULL);
|
|
|
|
|
|
|
|
/*
|
2018-02-06 21:50:13 +01:00
|
|
|
* These array elements get filled up with maps on an on-demand basis.
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
* Initially just set all of them to NULL.
|
|
|
|
*/
|
|
|
|
proute->child_parent_tupconv_maps =
|
|
|
|
(TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) *
|
|
|
|
proute->num_partitions);
|
|
|
|
|
|
|
|
/* Same is the case for this array. All the values are set to false */
|
|
|
|
proute->child_parent_map_not_required =
|
|
|
|
(bool *) palloc0(sizeof(bool) * proute->num_partitions);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TupConvMapForLeaf -- Get the tuple conversion map for a given leaf partition
|
|
|
|
* index.
|
|
|
|
*/
|
|
|
|
TupleConversionMap *
|
|
|
|
TupConvMapForLeaf(PartitionTupleRouting *proute,
|
|
|
|
ResultRelInfo *rootRelInfo, int leaf_index)
|
|
|
|
{
|
|
|
|
ResultRelInfo **resultRelInfos = proute->partitions;
|
|
|
|
TupleConversionMap **map;
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
|
|
|
|
/* Don't call this if we're not supposed to be using this type of map. */
|
|
|
|
Assert(proute->child_parent_tupconv_maps != NULL);
|
|
|
|
|
|
|
|
/* If it's already known that we don't need a map, return NULL. */
|
|
|
|
if (proute->child_parent_map_not_required[leaf_index])
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* If we've already got a map, return it. */
|
|
|
|
map = &proute->child_parent_tupconv_maps[leaf_index];
|
|
|
|
if (*map != NULL)
|
|
|
|
return *map;
|
|
|
|
|
|
|
|
/* No map yet; try to create one. */
|
|
|
|
tupdesc = RelationGetDescr(resultRelInfos[leaf_index]->ri_RelationDesc);
|
|
|
|
*map =
|
|
|
|
convert_tuples_by_name(tupdesc,
|
|
|
|
RelationGetDescr(rootRelInfo->ri_RelationDesc),
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
|
|
|
|
/* If it turns out no map is needed, remember for next time. */
|
|
|
|
proute->child_parent_map_not_required[leaf_index] = (*map == NULL);
|
|
|
|
|
|
|
|
return *map;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ConvertPartitionTupleSlot -- convenience function for tuple conversion.
|
|
|
|
* The tuple, if converted, is stored in new_slot, and *p_my_slot is
|
|
|
|
* updated to point to it. new_slot typically should be one of the
|
|
|
|
* dedicated partition tuple slots. If map is NULL, *p_my_slot is not changed.
|
|
|
|
*
|
|
|
|
* Returns the converted tuple, unless map is NULL, in which case original
|
|
|
|
* tuple is returned unmodified.
|
|
|
|
*/
|
|
|
|
HeapTuple
|
|
|
|
ConvertPartitionTupleSlot(TupleConversionMap *map,
|
|
|
|
HeapTuple tuple,
|
|
|
|
TupleTableSlot *new_slot,
|
|
|
|
TupleTableSlot **p_my_slot)
|
|
|
|
{
|
|
|
|
if (!map)
|
|
|
|
return tuple;
|
|
|
|
|
|
|
|
tuple = do_convert_tuple(tuple, map);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change the partition tuple slot descriptor, as per converted tuple.
|
|
|
|
*/
|
|
|
|
*p_my_slot = new_slot;
|
|
|
|
Assert(new_slot != NULL);
|
|
|
|
ExecSetSlotDescriptor(new_slot, map->outdesc);
|
|
|
|
ExecStoreTuple(tuple, new_slot, InvalidBuffer, true);
|
|
|
|
|
|
|
|
return tuple;
|
|
|
|
}
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
/*
|
|
|
|
* ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
|
|
|
|
* routing.
|
|
|
|
*
|
|
|
|
* Close all the partitioned tables, leaf partitions, and their indices.
|
|
|
|
*/
|
|
|
|
void
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
ExecCleanupTupleRouting(PartitionTupleRouting *proute)
|
2018-01-04 21:48:15 +01:00
|
|
|
{
|
|
|
|
int i;
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
int subplan_index = 0;
|
2018-01-04 21:48:15 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Remember, proute->partition_dispatch_info[0] corresponds to the root
|
|
|
|
* partitioned table, which we must not try to close, because it is the
|
|
|
|
* main target table of the query that will be closed by callers such as
|
|
|
|
* ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
|
|
|
|
* partitioned table.
|
|
|
|
*/
|
|
|
|
for (i = 1; i < proute->num_dispatch; i++)
|
|
|
|
{
|
|
|
|
PartitionDispatch pd = proute->partition_dispatch_info[i];
|
|
|
|
|
|
|
|
heap_close(pd->reldesc, NoLock);
|
|
|
|
ExecDropSingleTupleTableSlot(pd->tupslot);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < proute->num_partitions; i++)
|
|
|
|
{
|
|
|
|
ResultRelInfo *resultRelInfo = proute->partitions[i];
|
|
|
|
|
Be lazier about partition tuple routing.
It's not necessary to fully initialize the executor data structures
for partitions to which no tuples are ever routed. Consider, for
example, an INSERT statement that inserts only one row: it only cares
about the partition to which that one row is routed. The new function
ExecInitPartitionInfo performs the initialization in question only
when a particular partition is about to receive a tuple. This includes
creating, validating, and saving a pointer to the ResultRelInfo,
setting up for speculative insertions, translating WCOs and
initializing the resulting expressions, translating returning lists
and building the appropriate projection information, and setting up a
tuple conversion map.
One thing that's not deferred is locking the child partitions; that
seems desirable but would need more thought. Still, testing shows
that this makes single-row inserts significantly faster on a table
with many partitions without harming the bulk-insert case.
Amit Langote, reviewed by Etsuro Fujita, with a few changes by me
Discussion: http://postgr.es/m/8975331d-d961-cbdd-f862-fdd3d97dc2d0@lab.ntt.co.jp
2018-02-22 16:55:54 +01:00
|
|
|
/* skip further processsing for uninitialized partitions */
|
|
|
|
if (resultRelInfo == NULL)
|
|
|
|
continue;
|
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/*
|
|
|
|
* If this result rel is one of the UPDATE subplan result rels, let
|
|
|
|
* ExecEndPlan() close it. For INSERT or COPY,
|
|
|
|
* proute->subplan_partition_offsets will always be NULL. Note that
|
|
|
|
* the subplan_partition_offsets array and the partitions array have
|
|
|
|
* the partitions in the same order. So, while we iterate over
|
|
|
|
* partitions array, we also iterate over the
|
|
|
|
* subplan_partition_offsets array in order to figure out which of the
|
|
|
|
* result rels are present in the UPDATE subplans.
|
|
|
|
*/
|
|
|
|
if (proute->subplan_partition_offsets &&
|
2018-01-24 22:34:51 +01:00
|
|
|
subplan_index < proute->num_subplan_partition_offsets &&
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
proute->subplan_partition_offsets[subplan_index] == i)
|
|
|
|
{
|
|
|
|
subplan_index++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-01-04 21:48:15 +01:00
|
|
|
ExecCloseIndices(resultRelInfo);
|
|
|
|
heap_close(resultRelInfo->ri_RelationDesc, NoLock);
|
|
|
|
}
|
|
|
|
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
/* Release the standalone partition tuple descriptors, if any */
|
|
|
|
if (proute->root_tuple_slot)
|
|
|
|
ExecDropSingleTupleTableSlot(proute->root_tuple_slot);
|
2018-01-04 21:48:15 +01:00
|
|
|
if (proute->partition_tuple_slot)
|
|
|
|
ExecDropSingleTupleTableSlot(proute->partition_tuple_slot);
|
|
|
|
}
|
|
|
|
|
2017-11-15 16:23:28 +01:00
|
|
|
/*
|
|
|
|
* RelationGetPartitionDispatchInfo
|
|
|
|
* Returns information necessary to route tuples down a partition tree
|
|
|
|
*
|
|
|
|
* The number of elements in the returned array (that is, the number of
|
|
|
|
* PartitionDispatch objects for the partitioned tables in the partition tree)
|
|
|
|
* is returned in *num_parted and a list of the OIDs of all the leaf
|
|
|
|
* partitions of rel is returned in *leaf_part_oids.
|
|
|
|
*
|
|
|
|
* All the relations in the partition tree (including 'rel') must have been
|
|
|
|
* locked (using at least the AccessShareLock) by the caller.
|
|
|
|
*/
|
|
|
|
static PartitionDispatch *
|
|
|
|
RelationGetPartitionDispatchInfo(Relation rel,
|
|
|
|
int *num_parted, List **leaf_part_oids)
|
|
|
|
{
|
|
|
|
List *pdlist = NIL;
|
|
|
|
PartitionDispatchData **pd;
|
|
|
|
ListCell *lc;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
|
|
|
|
|
|
|
|
*num_parted = 0;
|
|
|
|
*leaf_part_oids = NIL;
|
|
|
|
|
|
|
|
get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids);
|
|
|
|
*num_parted = list_length(pdlist);
|
|
|
|
pd = (PartitionDispatchData **) palloc(*num_parted *
|
|
|
|
sizeof(PartitionDispatchData *));
|
|
|
|
i = 0;
|
|
|
|
foreach(lc, pdlist)
|
|
|
|
{
|
|
|
|
pd[i++] = lfirst(lc);
|
|
|
|
}
|
|
|
|
|
|
|
|
return pd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get_partition_dispatch_recurse
|
|
|
|
* Recursively expand partition tree rooted at rel
|
|
|
|
*
|
|
|
|
* As the partition tree is expanded in a depth-first manner, we maintain two
|
|
|
|
* global lists: of PartitionDispatch objects corresponding to partitioned
|
|
|
|
* tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
|
|
|
|
*
|
|
|
|
* Note that the order of OIDs of leaf partitions in leaf_part_oids matches
|
|
|
|
* the order in which the planner's expand_partitioned_rtentry() processes
|
|
|
|
* them. It's not necessarily the case that the offsets match up exactly,
|
|
|
|
* because constraint exclusion might prune away some partitions on the
|
|
|
|
* planner side, whereas we'll always have the complete list; but unpruned
|
|
|
|
* partitions will appear in the same order in the plan as they are returned
|
|
|
|
* here.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
get_partition_dispatch_recurse(Relation rel, Relation parent,
|
|
|
|
List **pds, List **leaf_part_oids)
|
|
|
|
{
|
|
|
|
TupleDesc tupdesc = RelationGetDescr(rel);
|
|
|
|
PartitionDesc partdesc = RelationGetPartitionDesc(rel);
|
|
|
|
PartitionKey partkey = RelationGetPartitionKey(rel);
|
|
|
|
PartitionDispatch pd;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
check_stack_depth();
|
|
|
|
|
|
|
|
/* Build a PartitionDispatch for this table and add it to *pds. */
|
|
|
|
pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
|
|
|
|
*pds = lappend(*pds, pd);
|
|
|
|
pd->reldesc = rel;
|
|
|
|
pd->key = partkey;
|
|
|
|
pd->keystate = NIL;
|
|
|
|
pd->partdesc = partdesc;
|
|
|
|
if (parent != NULL)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* For every partitioned table other than the root, we must store a
|
|
|
|
* tuple table slot initialized with its tuple descriptor and a tuple
|
|
|
|
* conversion map to convert a tuple from its parent's rowtype to its
|
|
|
|
* own. That is to make sure that we are looking at the correct row
|
|
|
|
* using the correct tuple descriptor when computing its partition key
|
|
|
|
* for tuple routing.
|
|
|
|
*/
|
|
|
|
pd->tupslot = MakeSingleTupleTableSlot(tupdesc);
|
|
|
|
pd->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
|
|
|
|
tupdesc,
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Not required for the root partitioned table */
|
|
|
|
pd->tupslot = NULL;
|
|
|
|
pd->tupmap = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go look at each partition of this table. If it's a leaf partition,
|
|
|
|
* simply add its OID to *leaf_part_oids. If it's a partitioned table,
|
|
|
|
* recursively call get_partition_dispatch_recurse(), so that its
|
|
|
|
* partitions are processed as well and a corresponding PartitionDispatch
|
|
|
|
* object gets added to *pds.
|
|
|
|
*
|
|
|
|
* About the values in pd->indexes: for a leaf partition, it contains the
|
|
|
|
* leaf partition's position in the global list *leaf_part_oids minus 1,
|
|
|
|
* whereas for a partitioned table partition, it contains the partition's
|
|
|
|
* position in the global list *pds multiplied by -1. The latter is
|
|
|
|
* multiplied by -1 to distinguish partitioned tables from leaf partitions
|
|
|
|
* when going through the values in pd->indexes. So, for example, when
|
|
|
|
* using it during tuple-routing, encountering a value >= 0 means we found
|
|
|
|
* a leaf partition. It is immediately returned as the index in the array
|
|
|
|
* of ResultRelInfos of all the leaf partitions, using which we insert the
|
|
|
|
* tuple into that leaf partition. A negative value means we found a
|
|
|
|
* partitioned table. The value multiplied by -1 is returned as the index
|
|
|
|
* in the array of PartitionDispatch objects of all partitioned tables in
|
|
|
|
* the tree. This value is used to continue the search in the next level
|
|
|
|
* of the partition tree.
|
|
|
|
*/
|
|
|
|
pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
|
|
|
|
for (i = 0; i < partdesc->nparts; i++)
|
|
|
|
{
|
|
|
|
Oid partrelid = partdesc->oids[i];
|
|
|
|
|
|
|
|
if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
|
|
|
|
{
|
|
|
|
*leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
|
|
|
|
pd->indexes[i] = list_length(*leaf_part_oids) - 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We assume all tables in the partition tree were already locked
|
|
|
|
* by the caller.
|
|
|
|
*/
|
|
|
|
Relation partrel = heap_open(partrelid, NoLock);
|
|
|
|
|
|
|
|
pd->indexes[i] = -list_length(*pds);
|
|
|
|
get_partition_dispatch_recurse(partrel, rel, pds, leaf_part_oids);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------
|
|
|
|
* FormPartitionKeyDatum
|
|
|
|
* Construct values[] and isnull[] arrays for the partition key
|
|
|
|
* of a tuple.
|
|
|
|
*
|
|
|
|
* pd Partition dispatch object of the partitioned table
|
|
|
|
* slot Heap tuple from which to extract partition key
|
|
|
|
* estate executor state for evaluating any partition key
|
|
|
|
* expressions (must be non-NULL)
|
|
|
|
* values Array of partition key Datums (output area)
|
|
|
|
* isnull Array of is-null indicators (output area)
|
|
|
|
*
|
|
|
|
* the ecxt_scantuple slot of estate's per-tuple expr context must point to
|
|
|
|
* the heap tuple passed in.
|
|
|
|
* ----------------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
FormPartitionKeyDatum(PartitionDispatch pd,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull)
|
|
|
|
{
|
|
|
|
ListCell *partexpr_item;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (pd->key->partexprs != NIL && pd->keystate == NIL)
|
|
|
|
{
|
|
|
|
/* Check caller has set up context correctly */
|
|
|
|
Assert(estate != NULL &&
|
|
|
|
GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
|
|
|
|
|
|
|
|
/* First time through, set up expression evaluation state */
|
|
|
|
pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
|
|
|
|
}
|
|
|
|
|
|
|
|
partexpr_item = list_head(pd->keystate);
|
|
|
|
for (i = 0; i < pd->key->partnatts; i++)
|
|
|
|
{
|
|
|
|
AttrNumber keycol = pd->key->partattrs[i];
|
|
|
|
Datum datum;
|
|
|
|
bool isNull;
|
|
|
|
|
|
|
|
if (keycol != 0)
|
|
|
|
{
|
|
|
|
/* Plain column; get the value directly from the heap tuple */
|
|
|
|
datum = slot_getattr(slot, keycol, &isNull);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Expression; need to evaluate it */
|
|
|
|
if (partexpr_item == NULL)
|
|
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
|
|
datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
|
|
|
|
GetPerTupleExprContext(estate),
|
|
|
|
&isNull);
|
|
|
|
partexpr_item = lnext(partexpr_item);
|
|
|
|
}
|
|
|
|
values[i] = datum;
|
|
|
|
isnull[i] = isNull;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (partexpr_item != NULL)
|
|
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-11-28 20:17:21 +01:00
|
|
|
* ExecBuildSlotPartitionKeyDescription
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
|
|
|
* This works very much like BuildIndexValueDescription() and is currently
|
|
|
|
* used for building error messages when ExecFindPartition() fails to find
|
|
|
|
* partition for a row.
|
|
|
|
*/
|
|
|
|
static char *
|
|
|
|
ExecBuildSlotPartitionKeyDescription(Relation rel,
|
|
|
|
Datum *values,
|
|
|
|
bool *isnull,
|
|
|
|
int maxfieldlen)
|
|
|
|
{
|
|
|
|
StringInfoData buf;
|
|
|
|
PartitionKey key = RelationGetPartitionKey(rel);
|
|
|
|
int partnatts = get_partition_natts(key);
|
|
|
|
int i;
|
|
|
|
Oid relid = RelationGetRelid(rel);
|
|
|
|
AclResult aclresult;
|
|
|
|
|
|
|
|
if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* If the user has table-level access, just go build the description. */
|
|
|
|
aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
|
|
|
|
if (aclresult != ACLCHECK_OK)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Step through the columns of the partition key and make sure the
|
|
|
|
* user has SELECT rights on all of them.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < partnatts; i++)
|
|
|
|
{
|
|
|
|
AttrNumber attnum = get_partition_col_attnum(key, i);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this partition key column is an expression, we return no
|
|
|
|
* detail rather than try to figure out what column(s) the
|
|
|
|
* expression includes and if the user has SELECT rights on them.
|
|
|
|
*/
|
|
|
|
if (attnum == InvalidAttrNumber ||
|
|
|
|
pg_attribute_aclcheck(relid, attnum, GetUserId(),
|
|
|
|
ACL_SELECT) != ACLCHECK_OK)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
initStringInfo(&buf);
|
|
|
|
appendStringInfo(&buf, "(%s) = (",
|
|
|
|
pg_get_partkeydef_columns(relid, true));
|
|
|
|
|
|
|
|
for (i = 0; i < partnatts; i++)
|
|
|
|
{
|
|
|
|
char *val;
|
|
|
|
int vallen;
|
|
|
|
|
|
|
|
if (isnull[i])
|
|
|
|
val = "null";
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Oid foutoid;
|
|
|
|
bool typisvarlena;
|
|
|
|
|
|
|
|
getTypeOutputInfo(get_partition_col_typid(key, i),
|
|
|
|
&foutoid, &typisvarlena);
|
|
|
|
val = OidOutputFunctionCall(foutoid, values[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i > 0)
|
|
|
|
appendStringInfoString(&buf, ", ");
|
|
|
|
|
|
|
|
/* truncate if needed */
|
|
|
|
vallen = strlen(val);
|
|
|
|
if (vallen <= maxfieldlen)
|
|
|
|
appendStringInfoString(&buf, val);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
vallen = pg_mbcliplen(val, vallen, maxfieldlen);
|
|
|
|
appendBinaryStringInfo(&buf, val, vallen);
|
|
|
|
appendStringInfoString(&buf, "...");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
appendStringInfoChar(&buf, ')');
|
|
|
|
|
|
|
|
return buf.data;
|
|
|
|
}
|
2018-03-26 15:43:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* adjust_partition_tlist
|
|
|
|
* Adjust the targetlist entries for a given partition to account for
|
|
|
|
* attribute differences between parent and the partition
|
|
|
|
*
|
|
|
|
* The expressions have already been fixed, but here we fix the list to make
|
|
|
|
* target resnos match the partition's attribute numbers. This results in a
|
|
|
|
* copy of the original target list in which the entries appear in resno
|
|
|
|
* order, including both the existing entries (that may have their resno
|
|
|
|
* changed in-place) and the newly added entries for columns that don't exist
|
|
|
|
* in the parent.
|
|
|
|
*
|
|
|
|
* Scribbles on the input tlist, so callers must make sure to make a copy
|
|
|
|
* before passing it to us.
|
|
|
|
*/
|
|
|
|
static List *
|
|
|
|
adjust_partition_tlist(List *tlist, TupleConversionMap *map)
|
|
|
|
{
|
|
|
|
List *new_tlist = NIL;
|
|
|
|
TupleDesc tupdesc = map->outdesc;
|
|
|
|
AttrNumber *attrMap = map->attrMap;
|
|
|
|
AttrNumber attrno;
|
|
|
|
|
|
|
|
for (attrno = 1; attrno <= tupdesc->natts; attrno++)
|
|
|
|
{
|
|
|
|
Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1);
|
|
|
|
TargetEntry *tle;
|
|
|
|
|
|
|
|
if (attrMap[attrno - 1] != InvalidAttrNumber)
|
|
|
|
{
|
|
|
|
Assert(!att_tup->attisdropped);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the corresponding entry from the parent's tlist, adjusting
|
|
|
|
* the resno the match the partition's attno.
|
|
|
|
*/
|
|
|
|
tle = (TargetEntry *) list_nth(tlist, attrMap[attrno - 1] - 1);
|
|
|
|
tle->resno = attrno;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Const *expr;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For a dropped attribute in the partition, generate a dummy
|
|
|
|
* entry with resno matching the partition's attno.
|
|
|
|
*/
|
|
|
|
Assert(att_tup->attisdropped);
|
|
|
|
expr = makeConst(INT4OID,
|
|
|
|
-1,
|
|
|
|
InvalidOid,
|
|
|
|
sizeof(int32),
|
|
|
|
(Datum) 0,
|
|
|
|
true, /* isnull */
|
|
|
|
true /* byval */ );
|
|
|
|
tle = makeTargetEntry((Expr *) expr,
|
|
|
|
attrno,
|
|
|
|
pstrdup(NameStr(att_tup->attname)),
|
|
|
|
false);
|
|
|
|
}
|
|
|
|
|
|
|
|
new_tlist = lappend(new_tlist, tle);
|
|
|
|
}
|
|
|
|
|
|
|
|
return new_tlist;
|
|
|
|
}
|