1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* execMain.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* top level executor interface routines
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* INTERFACE ROUTINES
|
1997-09-07 07:04:48 +02:00
|
|
|
* ExecutorStart()
|
|
|
|
* ExecutorRun()
|
2011-02-27 19:43:29 +01:00
|
|
|
* ExecutorFinish()
|
1997-09-07 07:04:48 +02:00
|
|
|
* ExecutorEnd()
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2011-02-27 19:43:29 +01:00
|
|
|
* These four procedures are the external interface to the executor.
|
2002-12-05 16:50:39 +01:00
|
|
|
* In each case, the query descriptor is required as an argument.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2011-02-27 19:43:29 +01:00
|
|
|
* ExecutorStart must be called at the beginning of execution of any
|
|
|
|
* query plan and ExecutorEnd must always be called at the end of
|
|
|
|
* execution of a plan (unless it is aborted due to error).
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2002-02-27 20:36:13 +01:00
|
|
|
* ExecutorRun accepts direction and count arguments that specify whether
|
1997-09-07 07:04:48 +02:00
|
|
|
* the plan is to be executed forwards, backwards, and for how many tuples.
|
2011-02-27 19:43:29 +01:00
|
|
|
* In some cases ExecutorRun may be called multiple times to process all
|
2014-05-06 18:12:18 +02:00
|
|
|
* the tuples for a plan. It is also acceptable to stop short of executing
|
2011-02-27 19:43:29 +01:00
|
|
|
* the whole plan (but only if it is a SELECT).
|
|
|
|
*
|
|
|
|
* ExecutorFinish must be called after the final ExecutorRun call and
|
|
|
|
* before ExecutorEnd. This can be omitted only in case of EXPLAIN,
|
|
|
|
* which should also omit ExecutorRun.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2018-01-03 05:30:12 +01:00
|
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/executor/execMain.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1996-10-31 11:12:26 +01:00
|
|
|
#include "postgres.h"
|
|
|
|
|
2012-08-30 22:15:44 +02:00
|
|
|
#include "access/htup_details.h"
|
2009-01-22 21:16:10 +01:00
|
|
|
#include "access/sysattr.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "access/transam.h"
|
|
|
|
#include "access/xact.h"
|
2002-03-26 20:17:02 +01:00
|
|
|
#include "catalog/namespace.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "catalog/pg_publication.h"
|
2013-07-16 19:55:44 +02:00
|
|
|
#include "commands/matview.h"
|
1997-09-01 10:01:46 +02:00
|
|
|
#include "commands/trigger.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "executor/execdebug.h"
|
2013-03-10 19:14:53 +01:00
|
|
|
#include "foreign/fdwapi.h"
|
2011-11-29 21:02:10 +01:00
|
|
|
#include "mb/pg_wchar.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "miscadmin.h"
|
2003-12-28 22:57:37 +01:00
|
|
|
#include "optimizer/clauses.h"
|
2006-07-11 18:35:33 +02:00
|
|
|
#include "parser/parsetree.h"
|
2016-12-21 17:36:10 +01:00
|
|
|
#include "rewrite/rewriteManip.h"
|
2008-05-12 02:00:54 +02:00
|
|
|
#include "storage/bufmgr.h"
|
|
|
|
#include "storage/lmgr.h"
|
2010-02-20 22:24:02 +01:00
|
|
|
#include "tcop/utility.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "utils/acl.h"
|
2002-03-22 00:27:25 +01:00
|
|
|
#include "utils/lsyscache.h"
|
2005-05-06 19:24:55 +02:00
|
|
|
#include "utils/memutils.h"
|
2018-04-15 02:12:14 +02:00
|
|
|
#include "utils/partcache.h"
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
#include "utils/rls.h"
|
2017-03-03 04:37:41 +01:00
|
|
|
#include "utils/ruleutils.h"
|
2008-05-12 22:02:02 +02:00
|
|
|
#include "utils/snapmgr.h"
|
2008-03-26 22:10:39 +01:00
|
|
|
#include "utils/tqual.h"
|
1996-11-06 07:52:23 +01:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2011-02-27 19:43:29 +01:00
|
|
|
/* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
|
2009-06-11 16:49:15 +02:00
|
|
|
ExecutorStart_hook_type ExecutorStart_hook = NULL;
|
|
|
|
ExecutorRun_hook_type ExecutorRun_hook = NULL;
|
2011-02-27 19:43:29 +01:00
|
|
|
ExecutorFinish_hook_type ExecutorFinish_hook = NULL;
|
2009-06-11 16:49:15 +02:00
|
|
|
ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
|
2008-07-18 20:23:47 +02:00
|
|
|
|
2010-07-09 16:06:01 +02:00
|
|
|
/* Hook for plugin to get control in ExecCheckRTPerms() */
|
|
|
|
ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL;
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/* decls for local routines only used within this module */
|
2006-02-28 05:10:28 +01:00
|
|
|
static void InitPlan(QueryDesc *queryDesc, int eflags);
|
2011-06-02 20:46:15 +02:00
|
|
|
static void CheckValidRowMarkRel(Relation rel, RowMarkType markType);
|
2011-02-26 00:56:23 +01:00
|
|
|
static void ExecPostprocessPlan(EState *estate);
|
2007-02-27 02:11:26 +01:00
|
|
|
static void ExecEndPlan(PlanState *planstate, EState *estate);
|
2008-10-31 22:07:55 +01:00
|
|
|
static void ExecutePlan(EState *estate, PlanState *planstate,
|
2015-10-16 17:56:02 +02:00
|
|
|
bool use_parallel_mode,
|
2001-03-22 05:01:46 +01:00
|
|
|
CmdType operation,
|
2009-10-10 03:43:50 +02:00
|
|
|
bool sendTuples,
|
Widen query numbers-of-tuples-processed counters to uint64.
This patch widens SPI_processed, EState's es_processed field, PortalData's
portalPos field, FuncCallContext's call_cntr and max_calls fields,
ExecutorRun's count argument, PortalRunFetch's result, and the max number
of rows in a SPITupleTable to uint64, and deals with (I hope) all the
ensuing fallout. Some of these values were declared uint32 before, and
others "long".
I also removed PortalData's posOverflow field, since that logic seems
pretty useless given that portalPos is now always 64 bits.
The user-visible results are that command tags for SELECT etc will
correctly report tuple counts larger than 4G, as will plpgsql's GET
GET DIAGNOSTICS ... ROW_COUNT command. Queries processing more tuples
than that are still not exactly the norm, but they're becoming more
common.
Most values associated with FETCH/MOVE distances, such as PortalRun's count
argument and the count argument of most SPI functions that have one, remain
declared as "long". It's not clear whether it would be worth promoting
those to int64; but it would definitely be a large dollop of additional
API churn on top of this, and it would only help 32-bit platforms which
seem relatively less likely to see any benefit.
Andreas Scherbaum, reviewed by Christian Ullrich, additional hacking by me
2016-03-12 22:05:10 +01:00
|
|
|
uint64 numberTuples,
|
2001-03-22 05:01:46 +01:00
|
|
|
ScanDirection direction,
|
2017-03-23 18:05:48 +01:00
|
|
|
DestReceiver *dest,
|
|
|
|
bool execute_once);
|
2010-07-22 02:47:59 +02:00
|
|
|
static bool ExecCheckRTEPerms(RangeTblEntry *rte);
|
2015-05-08 00:20:46 +02:00
|
|
|
static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid,
|
|
|
|
Bitmapset *modifiedCols,
|
|
|
|
AclMode requiredPerms);
|
2007-11-15 23:25:18 +01:00
|
|
|
static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
static char *ExecBuildSlotValueDescription(Oid reloid,
|
|
|
|
TupleTableSlot *slot,
|
2013-11-07 20:41:36 +01:00
|
|
|
TupleDesc tupdesc,
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
Bitmapset *modifiedCols,
|
2012-06-10 21:20:04 +02:00
|
|
|
int maxfieldlen);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
|
2010-02-26 03:01:40 +01:00
|
|
|
Plan *planTree);
|
2000-04-12 19:17:23 +02:00
|
|
|
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
/*
|
2015-05-08 00:20:46 +02:00
|
|
|
* Note that GetUpdatedColumns() also exists in commands/trigger.c. There does
|
|
|
|
* not appear to be any good header to put it into, given the structures that
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
* it uses, so we let them be duplicated. Be sure to update both if one needs
|
|
|
|
* to be changed, however.
|
|
|
|
*/
|
2015-05-08 00:20:46 +02:00
|
|
|
#define GetInsertedColumns(relinfo, estate) \
|
|
|
|
(rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->insertedCols)
|
|
|
|
#define GetUpdatedColumns(relinfo, estate) \
|
|
|
|
(rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols)
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/* end of local decls */
|
|
|
|
|
2000-03-09 06:15:33 +01:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/* ----------------------------------------------------------------
|
1997-09-07 07:04:48 +02:00
|
|
|
* ExecutorStart
|
|
|
|
*
|
|
|
|
* This routine must be called at the beginning of any execution of any
|
|
|
|
* query plan
|
|
|
|
*
|
2011-02-27 19:43:29 +01:00
|
|
|
* Takes a QueryDesc previously created by CreateQueryDesc (which is separate
|
|
|
|
* only because some places use QueryDescs for utility commands). The tupDesc
|
2002-12-05 16:50:39 +01:00
|
|
|
* field of the QueryDesc is filled in to describe the tuples that will be
|
|
|
|
* returned, and the internal fields (estate and planstate) are set up.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2006-02-28 05:10:28 +01:00
|
|
|
* eflags contains flag bits as described in executor.h.
|
2003-05-06 02:20:33 +02:00
|
|
|
*
|
2002-12-15 17:17:59 +01:00
|
|
|
* NB: the CurrentMemoryContext when this is called will become the parent
|
|
|
|
* of the per-query context used for this Executor invocation.
|
2008-11-19 02:10:24 +01:00
|
|
|
*
|
|
|
|
* We provide a function hook variable that lets loadable plugins
|
|
|
|
* get control when ExecutorStart is called. Such a plugin would
|
|
|
|
* normally call standard_ExecutorStart().
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
2002-12-05 16:50:39 +01:00
|
|
|
void
|
2006-02-28 05:10:28 +01:00
|
|
|
ExecutorStart(QueryDesc *queryDesc, int eflags)
|
2008-11-19 02:10:24 +01:00
|
|
|
{
|
|
|
|
if (ExecutorStart_hook)
|
|
|
|
(*ExecutorStart_hook) (queryDesc, eflags);
|
|
|
|
else
|
|
|
|
standard_ExecutorStart(queryDesc, eflags);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-05 16:50:39 +01:00
|
|
|
EState *estate;
|
2002-12-15 17:17:59 +01:00
|
|
|
MemoryContext oldcontext;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-12-05 16:50:39 +01:00
|
|
|
/* sanity checks: queryDesc must not be started already */
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(queryDesc != NULL);
|
2002-12-05 16:50:39 +01:00
|
|
|
Assert(queryDesc->estate == NULL);
|
|
|
|
|
2003-05-06 02:20:33 +02:00
|
|
|
/*
|
2003-08-04 02:43:34 +02:00
|
|
|
* If the transaction is read-only, we need to check if any writes are
|
2006-02-28 05:10:28 +01:00
|
|
|
* planned to non-temporary tables. EXPLAIN is considered read-only.
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
*
|
2015-05-24 03:35:49 +02:00
|
|
|
* Don't allow writes in parallel mode. Supporting UPDATE and DELETE
|
|
|
|
* would require (a) storing the combocid hash in shared memory, rather
|
|
|
|
* than synchronizing it just once at the start of parallelism, and (b) an
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
* alternative to heap_update()'s reliance on xmax for mutual exclusion.
|
|
|
|
* INSERT may have no such troubles, but we forbid it to simplify the
|
|
|
|
* checks.
|
|
|
|
*
|
|
|
|
* We have lower-level defenses in CommandCounterIncrement and elsewhere
|
2015-05-24 03:35:49 +02:00
|
|
|
* against performing unsafe operations in parallel mode, but this gives a
|
|
|
|
* more user-friendly error message.
|
2003-05-06 02:20:33 +02:00
|
|
|
*/
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
if ((XactReadOnly || IsInParallelMode()) &&
|
|
|
|
!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
|
2007-02-20 18:32:18 +01:00
|
|
|
ExecCheckXactReadOnly(queryDesc->plannedstmt);
|
2003-05-06 02:20:33 +02:00
|
|
|
|
2002-12-05 16:50:39 +01:00
|
|
|
/*
|
2002-12-15 17:17:59 +01:00
|
|
|
* Build EState, switch into per-query memory context for startup.
|
2002-12-05 16:50:39 +01:00
|
|
|
*/
|
|
|
|
estate = CreateExecutorState();
|
|
|
|
queryDesc->estate = estate;
|
|
|
|
|
2002-12-15 17:17:59 +01:00
|
|
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Fill in external parameters, if any, from queryDesc; and allocate
|
|
|
|
* workspace for internal parameters
|
2002-12-15 17:17:59 +01:00
|
|
|
*/
|
2002-12-05 16:50:39 +01:00
|
|
|
estate->es_param_list_info = queryDesc->params;
|
1998-02-26 05:46:47 +01:00
|
|
|
|
2017-11-13 21:24:12 +01:00
|
|
|
if (queryDesc->plannedstmt->paramExecTypes != NIL)
|
|
|
|
{
|
|
|
|
int nParamExec;
|
|
|
|
|
|
|
|
nParamExec = list_length(queryDesc->plannedstmt->paramExecTypes);
|
1998-02-26 05:46:47 +01:00
|
|
|
estate->es_param_exec_vals = (ParamExecData *)
|
2017-11-13 21:24:12 +01:00
|
|
|
palloc0(nParamExec * sizeof(ParamExecData));
|
|
|
|
}
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2017-02-22 07:45:17 +01:00
|
|
|
estate->es_sourceText = queryDesc->sourceText;
|
|
|
|
|
2017-04-01 06:17:18 +02:00
|
|
|
/*
|
|
|
|
* Fill in the query environment, if any, from queryDesc.
|
|
|
|
*/
|
|
|
|
estate->es_queryEnv = queryDesc->queryEnv;
|
|
|
|
|
2007-11-30 22:22:54 +01:00
|
|
|
/*
|
|
|
|
* If non-read-only query, set the command ID to mark output tuples with
|
|
|
|
*/
|
|
|
|
switch (queryDesc->operation)
|
|
|
|
{
|
|
|
|
case CMD_SELECT:
|
2011-04-10 17:42:00 +02:00
|
|
|
|
2011-02-26 00:56:23 +01:00
|
|
|
/*
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
* SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark
|
|
|
|
* tuples
|
2011-02-26 00:56:23 +01:00
|
|
|
*/
|
Restructure SELECT INTO's parsetree representation into CreateTableAsStmt.
Making this operation look like a utility statement seems generally a good
idea, and particularly so in light of the desire to provide command
triggers for utility statements. The original choice of representing it as
SELECT with an IntoClause appendage had metastasized into rather a lot of
places, unfortunately, so that this patch is a great deal more complicated
than one might at first expect.
In particular, keeping EXPLAIN working for SELECT INTO and CREATE TABLE AS
subcommands required restructuring some EXPLAIN-related APIs. Add-on code
that calls ExplainOnePlan or ExplainOneUtility, or uses
ExplainOneQuery_hook, will need adjustment.
Also, the cases PREPARE ... SELECT INTO and CREATE RULE ... SELECT INTO,
which formerly were accepted though undocumented, are no longer accepted.
The PREPARE case can be replaced with use of CREATE TABLE AS EXECUTE.
The CREATE RULE case doesn't seem to have much real-world use (since the
rule would work only once before failing with "table already exists"),
so we'll not bother with that one.
Both SELECT INTO and CREATE TABLE AS still return a command tag of
"SELECT nnnn". There was some discussion of returning "CREATE TABLE nnnn",
but for the moment backwards compatibility wins the day.
Andres Freund and Tom Lane
2012-03-20 02:37:19 +01:00
|
|
|
if (queryDesc->plannedstmt->rowMarks != NIL ||
|
2011-02-26 00:56:23 +01:00
|
|
|
queryDesc->plannedstmt->hasModifyingCTE)
|
2007-11-30 22:22:54 +01:00
|
|
|
estate->es_output_cid = GetCurrentCommandId(true);
|
2011-02-27 19:43:29 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A SELECT without modifying CTEs can't possibly queue triggers,
|
|
|
|
* so force skip-triggers mode. This is just a marginal efficiency
|
|
|
|
* hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't
|
|
|
|
* all that expensive, but we might as well do it.
|
|
|
|
*/
|
|
|
|
if (!queryDesc->plannedstmt->hasModifyingCTE)
|
|
|
|
eflags |= EXEC_FLAG_SKIP_TRIGGERS;
|
2007-11-30 22:22:54 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CMD_INSERT:
|
|
|
|
case CMD_DELETE:
|
|
|
|
case CMD_UPDATE:
|
|
|
|
estate->es_output_cid = GetCurrentCommandId(true);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized operation code: %d",
|
|
|
|
(int) queryDesc->operation);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
1999-02-07 14:37:56 +01:00
|
|
|
/*
|
2004-09-13 22:10:13 +02:00
|
|
|
* Copy other important information into the EState
|
1999-02-07 14:37:56 +01:00
|
|
|
*/
|
2008-05-12 22:02:02 +02:00
|
|
|
estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
|
|
|
|
estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
|
2011-02-27 19:43:29 +01:00
|
|
|
estate->es_top_eflags = eflags;
|
2009-12-15 05:57:48 +01:00
|
|
|
estate->es_instrument = queryDesc->instrument_options;
|
2018-03-25 20:54:16 +02:00
|
|
|
estate->es_jit_flags = queryDesc->plannedstmt->jitFlags;
|
2018-03-22 19:45:07 +01:00
|
|
|
|
2011-02-27 19:43:29 +01:00
|
|
|
/*
|
|
|
|
* Set up an AFTER-trigger statement context, unless told not to, or
|
|
|
|
* unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
|
|
|
|
*/
|
|
|
|
if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
|
|
|
|
AfterTriggerBeginQuery();
|
|
|
|
|
Fix SQL-spec incompatibilities in new transition table feature.
The standard says that all changes of the same kind (insert, update, or
delete) caused in one table by a single SQL statement should be reported
in a single transition table; and by that, they mean to include foreign key
enforcement actions cascading from the statement's direct effects. It's
also reasonable to conclude that if the standard had wCTEs, they would say
that effects of wCTEs applying to the same table as each other or the outer
statement should be merged into one transition table. We weren't doing it
like that.
Hence, arrange to merge tuples from multiple update actions into a single
transition table as much as we can. There is a problem, which is that if
the firing of FK enforcement triggers and after-row triggers with
transition tables is interspersed, we might need to report more tuples
after some triggers have already seen the transition table. It seems like
a bad idea for the transition table to be mutable between trigger calls.
There's no good way around this without a major redesign of the FK logic,
so for now, resolve it by opening a new transition table each time this
happens.
Also, ensure that AFTER STATEMENT triggers fire just once per statement,
or once per transition table when we're forced to make more than one.
Previous versions of Postgres have allowed each FK enforcement query
to cause an additional firing of the AFTER STATEMENT triggers for the
referencing table, but that's certainly not per spec. (We're still
doing multiple firings of BEFORE STATEMENT triggers, though; is that
something worth changing?)
Also, forbid using transition tables with column-specific UPDATE triggers.
The spec requires such transition tables to show only the tuples for which
the UPDATE trigger would have fired, which means maintaining multiple
transition tables or else somehow filtering the contents at readout.
Maybe someday we'll bother to support that option, but it looks like a
lot of trouble for a marginal feature.
The transition tables are now managed by the AfterTriggers data structures,
rather than being directly the responsibility of ModifyTable nodes. This
removes a subtransaction-lifespan memory leak introduced by my previous
band-aid patch 3c4359521.
In passing, refactor the AfterTriggers data structures to reduce the
management overhead for them, by using arrays of structs rather than
several parallel arrays for per-query-level and per-subtransaction state.
I failed to resist the temptation to do some copy-editing on the SGML
docs about triggers, above and beyond merely documenting the effects
of this patch.
Back-patch to v10, because we don't want the semantics of transition
tables to change post-release.
Patch by me, with help and review from Thomas Munro.
Discussion: https://postgr.es/m/20170909064853.25630.12825@wrigleys.postgresql.org
2017-09-16 19:20:32 +02:00
|
|
|
/*
|
|
|
|
* Initialize the plan state tree
|
|
|
|
*/
|
|
|
|
InitPlan(queryDesc, eflags);
|
|
|
|
|
2002-12-15 17:17:59 +01:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
1997-09-07 07:04:48 +02:00
|
|
|
* ExecutorRun
|
|
|
|
*
|
|
|
|
* This is the main routine of the executor module. It accepts
|
|
|
|
* the query descriptor from the traffic cop and executes the
|
|
|
|
* query plan.
|
|
|
|
*
|
|
|
|
* ExecutorStart must have been called already.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2002-02-27 20:36:13 +01:00
|
|
|
* If direction is NoMovementScanDirection then nothing is done
|
|
|
|
* except to start up/shut down the destination. Otherwise,
|
|
|
|
* we retrieve up to 'count' tuples in the specified direction.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2003-01-09 00:32:29 +01:00
|
|
|
* Note: count = 0 is interpreted as no portal limit, i.e., run to
|
2013-01-25 00:34:00 +01:00
|
|
|
* completion. Also note that the count limit is only applied to
|
|
|
|
* retrieved tuples, not for instance to those inserted/updated/deleted
|
|
|
|
* by a ModifyTable plan node.
|
2000-10-26 23:38:24 +02:00
|
|
|
*
|
2008-10-31 22:07:55 +01:00
|
|
|
* There is no return value, but output tuples (if any) are sent to
|
|
|
|
* the destination receiver specified in the QueryDesc; and the number
|
|
|
|
* of tuples processed at the top level can be found in
|
|
|
|
* estate->es_processed.
|
|
|
|
*
|
2008-07-18 20:23:47 +02:00
|
|
|
* We provide a function hook variable that lets loadable plugins
|
|
|
|
* get control when ExecutorRun is called. Such a plugin would
|
|
|
|
* normally call standard_ExecutorRun().
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
2008-10-31 22:07:55 +01:00
|
|
|
void
|
2002-12-05 16:50:39 +01:00
|
|
|
ExecutorRun(QueryDesc *queryDesc,
|
2017-03-23 18:05:48 +01:00
|
|
|
ScanDirection direction, uint64 count,
|
|
|
|
bool execute_once)
|
2008-07-18 20:23:47 +02:00
|
|
|
{
|
|
|
|
if (ExecutorRun_hook)
|
2017-03-23 18:05:48 +01:00
|
|
|
(*ExecutorRun_hook) (queryDesc, direction, count, execute_once);
|
2008-07-18 20:23:47 +02:00
|
|
|
else
|
2017-03-23 18:05:48 +01:00
|
|
|
standard_ExecutorRun(queryDesc, direction, count, execute_once);
|
2008-07-18 20:23:47 +02:00
|
|
|
}
|
|
|
|
|
2008-10-31 22:07:55 +01:00
|
|
|
void
|
2008-07-18 20:23:47 +02:00
|
|
|
standard_ExecutorRun(QueryDesc *queryDesc,
|
2017-03-23 18:05:48 +01:00
|
|
|
ScanDirection direction, uint64 count, bool execute_once)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-05 16:50:39 +01:00
|
|
|
EState *estate;
|
2002-12-15 17:17:59 +01:00
|
|
|
CmdType operation;
|
2003-05-06 22:26:28 +02:00
|
|
|
DestReceiver *dest;
|
2006-08-12 04:52:06 +02:00
|
|
|
bool sendTuples;
|
2002-12-15 17:17:59 +01:00
|
|
|
MemoryContext oldcontext;
|
|
|
|
|
|
|
|
/* sanity checks */
|
|
|
|
Assert(queryDesc != NULL);
|
|
|
|
|
|
|
|
estate = queryDesc->estate;
|
|
|
|
|
|
|
|
Assert(estate != NULL);
|
2011-02-27 19:43:29 +01:00
|
|
|
Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2002-12-15 17:17:59 +01:00
|
|
|
* Switch into per-query memory context
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-12-15 17:17:59 +01:00
|
|
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-02-27 19:43:29 +01:00
|
|
|
/* Allow instrumentation of Executor overall runtime */
|
2008-11-19 02:10:24 +01:00
|
|
|
if (queryDesc->totaltime)
|
|
|
|
InstrStartNode(queryDesc->totaltime);
|
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* extract information from the query descriptor and the query feature.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
operation = queryDesc->operation;
|
|
|
|
dest = queryDesc->dest;
|
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2006-08-12 04:52:06 +02:00
|
|
|
* startup tuple receiver, if we will be emitting tuples
|
1999-01-29 12:56:01 +01:00
|
|
|
*/
|
2002-02-27 20:36:13 +01:00
|
|
|
estate->es_processed = 0;
|
|
|
|
estate->es_lastoid = InvalidOid;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2006-08-12 04:52:06 +02:00
|
|
|
sendTuples = (operation == CMD_SELECT ||
|
2009-10-10 03:43:50 +02:00
|
|
|
queryDesc->plannedstmt->hasReturning);
|
2006-08-12 04:52:06 +02:00
|
|
|
|
|
|
|
if (sendTuples)
|
2017-09-07 18:06:23 +02:00
|
|
|
dest->rStartup(dest, operation, queryDesc->tupDesc);
|
2000-10-26 23:38:24 +02:00
|
|
|
|
2002-02-27 20:36:13 +01:00
|
|
|
/*
|
|
|
|
* run plan
|
|
|
|
*/
|
2008-10-31 22:07:55 +01:00
|
|
|
if (!ScanDirectionIsNoMovement(direction))
|
2017-03-23 18:05:48 +01:00
|
|
|
{
|
|
|
|
if (execute_once && queryDesc->already_executed)
|
|
|
|
elog(ERROR, "can't re-execute query flagged for single execution");
|
|
|
|
queryDesc->already_executed = true;
|
|
|
|
|
2008-10-31 22:07:55 +01:00
|
|
|
ExecutePlan(estate,
|
|
|
|
queryDesc->planstate,
|
2015-10-16 17:56:02 +02:00
|
|
|
queryDesc->plannedstmt->parallelModeNeeded,
|
2008-10-31 22:07:55 +01:00
|
|
|
operation,
|
2009-10-10 03:43:50 +02:00
|
|
|
sendTuples,
|
2008-10-31 22:07:55 +01:00
|
|
|
count,
|
|
|
|
direction,
|
2017-03-23 18:05:48 +01:00
|
|
|
dest,
|
|
|
|
execute_once);
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-02-27 20:36:13 +01:00
|
|
|
/*
|
2006-08-12 04:52:06 +02:00
|
|
|
* shutdown tuple receiver, if we started it
|
2002-02-27 20:36:13 +01:00
|
|
|
*/
|
2006-08-12 04:52:06 +02:00
|
|
|
if (sendTuples)
|
2017-09-07 18:06:23 +02:00
|
|
|
dest->rShutdown(dest);
|
1999-01-29 12:56:01 +01:00
|
|
|
|
2008-11-19 02:10:24 +01:00
|
|
|
if (queryDesc->totaltime)
|
|
|
|
InstrStopNode(queryDesc->totaltime, estate->es_processed);
|
|
|
|
|
2002-12-15 17:17:59 +01:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2011-02-27 19:43:29 +01:00
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecutorFinish
|
|
|
|
*
|
|
|
|
* This routine must be called after the last ExecutorRun call.
|
2014-05-06 18:12:18 +02:00
|
|
|
* It performs cleanup such as firing AFTER triggers. It is
|
2011-02-27 19:43:29 +01:00
|
|
|
* separate from ExecutorEnd because EXPLAIN ANALYZE needs to
|
|
|
|
* include these actions in the total runtime.
|
|
|
|
*
|
|
|
|
* We provide a function hook variable that lets loadable plugins
|
2014-05-06 18:12:18 +02:00
|
|
|
* get control when ExecutorFinish is called. Such a plugin would
|
2011-02-27 19:43:29 +01:00
|
|
|
* normally call standard_ExecutorFinish().
|
|
|
|
*
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecutorFinish(QueryDesc *queryDesc)
|
|
|
|
{
|
|
|
|
if (ExecutorFinish_hook)
|
|
|
|
(*ExecutorFinish_hook) (queryDesc);
|
|
|
|
else
|
|
|
|
standard_ExecutorFinish(queryDesc);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
standard_ExecutorFinish(QueryDesc *queryDesc)
|
|
|
|
{
|
|
|
|
EState *estate;
|
|
|
|
MemoryContext oldcontext;
|
|
|
|
|
|
|
|
/* sanity checks */
|
|
|
|
Assert(queryDesc != NULL);
|
|
|
|
|
|
|
|
estate = queryDesc->estate;
|
|
|
|
|
|
|
|
Assert(estate != NULL);
|
|
|
|
Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
|
|
|
|
|
|
|
|
/* This should be run once and only once per Executor instance */
|
|
|
|
Assert(!estate->es_finished);
|
|
|
|
|
|
|
|
/* Switch into per-query memory context */
|
|
|
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
|
|
|
|
/* Allow instrumentation of Executor overall runtime */
|
|
|
|
if (queryDesc->totaltime)
|
|
|
|
InstrStartNode(queryDesc->totaltime);
|
|
|
|
|
|
|
|
/* Run ModifyTable nodes to completion */
|
|
|
|
ExecPostprocessPlan(estate);
|
|
|
|
|
|
|
|
/* Execute queued AFTER triggers, unless told not to */
|
|
|
|
if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
|
|
|
|
AfterTriggerEndQuery(estate);
|
|
|
|
|
|
|
|
if (queryDesc->totaltime)
|
|
|
|
InstrStopNode(queryDesc->totaltime, 0);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
|
|
|
estate->es_finished = true;
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/* ----------------------------------------------------------------
|
1997-09-07 07:04:48 +02:00
|
|
|
* ExecutorEnd
|
|
|
|
*
|
2001-01-29 01:39:20 +01:00
|
|
|
* This routine must be called at the end of execution of any
|
1997-09-07 07:04:48 +02:00
|
|
|
* query plan
|
2008-11-19 02:10:24 +01:00
|
|
|
*
|
|
|
|
* We provide a function hook variable that lets loadable plugins
|
|
|
|
* get control when ExecutorEnd is called. Such a plugin would
|
2011-02-27 19:43:29 +01:00
|
|
|
* normally call standard_ExecutorEnd().
|
2008-11-19 02:10:24 +01:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
2002-12-05 16:50:39 +01:00
|
|
|
ExecutorEnd(QueryDesc *queryDesc)
|
2008-11-19 02:10:24 +01:00
|
|
|
{
|
|
|
|
if (ExecutorEnd_hook)
|
|
|
|
(*ExecutorEnd_hook) (queryDesc);
|
|
|
|
else
|
|
|
|
standard_ExecutorEnd(queryDesc);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
standard_ExecutorEnd(QueryDesc *queryDesc)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-05 16:50:39 +01:00
|
|
|
EState *estate;
|
2002-12-15 17:17:59 +01:00
|
|
|
MemoryContext oldcontext;
|
2002-12-05 16:50:39 +01:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/* sanity checks */
|
|
|
|
Assert(queryDesc != NULL);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-12-05 16:50:39 +01:00
|
|
|
estate = queryDesc->estate;
|
|
|
|
|
2002-12-15 17:17:59 +01:00
|
|
|
Assert(estate != NULL);
|
2002-12-05 16:50:39 +01:00
|
|
|
|
2011-02-27 19:43:29 +01:00
|
|
|
/*
|
2011-04-10 17:42:00 +02:00
|
|
|
* Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
|
|
|
|
* Assert is needed because ExecutorFinish is new as of 9.1, and callers
|
|
|
|
* might forget to call it.
|
2011-02-27 19:43:29 +01:00
|
|
|
*/
|
|
|
|
Assert(estate->es_finished ||
|
|
|
|
(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
|
|
|
|
|
2000-03-09 06:15:33 +01:00
|
|
|
/*
|
2002-12-15 17:17:59 +01:00
|
|
|
* Switch into per-query memory context to run ExecEndPlan
|
2000-03-09 06:15:33 +01:00
|
|
|
*/
|
2002-12-15 17:17:59 +01:00
|
|
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
|
|
|
|
ExecEndPlan(queryDesc->planstate, estate);
|
1998-02-26 05:46:47 +01:00
|
|
|
|
2008-05-12 22:02:02 +02:00
|
|
|
/* do away with our snapshots */
|
|
|
|
UnregisterSnapshot(estate->es_snapshot);
|
|
|
|
UnregisterSnapshot(estate->es_crosscheck_snapshot);
|
|
|
|
|
2000-03-09 06:15:33 +01:00
|
|
|
/*
|
2002-12-15 17:17:59 +01:00
|
|
|
* Must switch out of context before destroying it
|
2000-03-09 06:15:33 +01:00
|
|
|
*/
|
2002-12-15 17:17:59 +01:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
2000-03-09 06:15:33 +01:00
|
|
|
|
2002-12-05 16:50:39 +01:00
|
|
|
/*
|
2002-12-15 17:17:59 +01:00
|
|
|
* Release EState and per-query memory context. This should release
|
|
|
|
* everything the executor has allocated.
|
2002-12-05 16:50:39 +01:00
|
|
|
*/
|
2002-12-15 17:17:59 +01:00
|
|
|
FreeExecutorState(estate);
|
|
|
|
|
|
|
|
/* Reset queryDesc fields that no longer point to anything */
|
|
|
|
queryDesc->tupDesc = NULL;
|
|
|
|
queryDesc->estate = NULL;
|
|
|
|
queryDesc->planstate = NULL;
|
2008-11-19 02:10:24 +01:00
|
|
|
queryDesc->totaltime = NULL;
|
2000-03-09 06:15:33 +01:00
|
|
|
}
|
1999-01-25 13:01:19 +01:00
|
|
|
|
2003-03-11 20:40:24 +01:00
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecutorRewind
|
|
|
|
*
|
|
|
|
* This routine may be called on an open queryDesc to rewind it
|
|
|
|
* to the start.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecutorRewind(QueryDesc *queryDesc)
|
|
|
|
{
|
|
|
|
EState *estate;
|
|
|
|
MemoryContext oldcontext;
|
|
|
|
|
|
|
|
/* sanity checks */
|
|
|
|
Assert(queryDesc != NULL);
|
|
|
|
|
|
|
|
estate = queryDesc->estate;
|
|
|
|
|
|
|
|
Assert(estate != NULL);
|
|
|
|
|
|
|
|
/* It's probably not sensible to rescan updating queries */
|
|
|
|
Assert(queryDesc->operation == CMD_SELECT);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Switch into per-query memory context
|
|
|
|
*/
|
|
|
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rescan plan
|
|
|
|
*/
|
2010-07-12 19:01:06 +02:00
|
|
|
ExecReScan(queryDesc->planstate);
|
2003-03-11 20:40:24 +01:00
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
}
|
|
|
|
|
2002-12-05 16:50:39 +01:00
|
|
|
|
2000-03-09 06:15:33 +01:00
|
|
|
/*
|
|
|
|
* ExecCheckRTPerms
|
|
|
|
* Check access permissions for all relations listed in a range table.
|
2010-07-22 02:47:59 +02:00
|
|
|
*
|
|
|
|
* Returns true if permissions are adequate. Otherwise, throws an appropriate
|
|
|
|
* error if ereport_on_violation is true, or simply returns false otherwise.
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
*
|
Rename pg_rowsecurity -> pg_policy and other fixes
As pointed out by Robert, we should really have named pg_rowsecurity
pg_policy, as the objects stored in that catalog are policies. This
patch fixes that and updates the column names to start with 'pol' to
match the new catalog name.
The security consideration for COPY with row level security, also
pointed out by Robert, has also been addressed by remembering and
re-checking the OID of the relation initially referenced during COPY
processing, to make sure it hasn't changed under us by the time we
finish planning out the query which has been built.
Robert and Alvaro also commented on missing OCLASS and OBJECT entries
for POLICY (formerly ROWSECURITY or POLICY, depending) in various
places. This patch fixes that too, which also happens to add the
ability to COMMENT on policies.
In passing, attempt to improve the consistency of messages, comments,
and documentation as well. This removes various incarnations of
'row-security', 'row-level security', 'Row-security', etc, in favor
of 'policy', 'row level security' or 'row_security' as appropriate.
Happy Thanksgiving!
2014-11-27 07:06:36 +01:00
|
|
|
* Note that this does NOT address row level security policies (aka: RLS). If
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
* rows will be returned to the user as a result of this permission check
|
|
|
|
* passing, then RLS also needs to be consulted (and check_enable_rls()).
|
|
|
|
*
|
|
|
|
* See rewrite/rowsecurity.c.
|
2000-03-09 06:15:33 +01:00
|
|
|
*/
|
2010-07-22 02:47:59 +02:00
|
|
|
bool
|
|
|
|
ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation)
|
2000-03-09 06:15:33 +01:00
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
2010-07-22 02:47:59 +02:00
|
|
|
bool result = true;
|
2000-03-09 06:15:33 +01:00
|
|
|
|
2004-05-26 06:41:50 +02:00
|
|
|
foreach(l, rangeTable)
|
1999-01-25 13:01:19 +01:00
|
|
|
{
|
2011-04-10 17:42:00 +02:00
|
|
|
RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
|
2010-07-22 02:47:59 +02:00
|
|
|
|
|
|
|
result = ExecCheckRTEPerms(rte);
|
|
|
|
if (!result)
|
|
|
|
{
|
|
|
|
Assert(rte->rtekind == RTE_RELATION);
|
|
|
|
if (ereport_on_violation)
|
2017-12-02 15:26:34 +01:00
|
|
|
aclcheck_error(ACLCHECK_NO_PRIV, get_relkind_objtype(get_rel_relkind(rte->relid)),
|
2010-07-22 02:47:59 +02:00
|
|
|
get_rel_name(rte->relid));
|
|
|
|
return false;
|
|
|
|
}
|
2000-03-09 06:15:33 +01:00
|
|
|
}
|
2010-07-09 16:06:01 +02:00
|
|
|
|
|
|
|
if (ExecutorCheckPerms_hook)
|
2011-04-10 17:42:00 +02:00
|
|
|
result = (*ExecutorCheckPerms_hook) (rangeTable,
|
|
|
|
ereport_on_violation);
|
2010-07-22 02:47:59 +02:00
|
|
|
return result;
|
2000-03-09 06:15:33 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecCheckRTEPerms
|
|
|
|
* Check access permissions for a single RTE.
|
|
|
|
*/
|
2010-07-22 02:47:59 +02:00
|
|
|
static bool
|
2004-01-15 00:01:55 +01:00
|
|
|
ExecCheckRTEPerms(RangeTblEntry *rte)
|
2000-03-09 06:15:33 +01:00
|
|
|
{
|
2004-01-15 00:01:55 +01:00
|
|
|
AclMode requiredPerms;
|
2009-01-22 21:16:10 +01:00
|
|
|
AclMode relPerms;
|
|
|
|
AclMode remainingPerms;
|
2002-03-22 00:27:25 +01:00
|
|
|
Oid relOid;
|
2005-10-15 04:49:52 +02:00
|
|
|
Oid userid;
|
2000-03-09 06:15:33 +01:00
|
|
|
|
2002-09-04 22:31:48 +02:00
|
|
|
/*
|
2007-02-22 23:00:26 +01:00
|
|
|
* Only plain-relation RTEs need to be checked here. Function RTEs are
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
* checked when the function is prepared for execution. Join, subquery,
|
|
|
|
* and special RTEs need no checks.
|
2002-09-04 22:31:48 +02:00
|
|
|
*/
|
2002-03-22 00:27:25 +01:00
|
|
|
if (rte->rtekind != RTE_RELATION)
|
2010-07-22 02:47:59 +02:00
|
|
|
return true;
|
2000-03-09 06:15:33 +01:00
|
|
|
|
2004-01-15 00:01:55 +01:00
|
|
|
/*
|
|
|
|
* No work if requiredPerms is empty.
|
|
|
|
*/
|
|
|
|
requiredPerms = rte->requiredPerms;
|
|
|
|
if (requiredPerms == 0)
|
2010-07-22 02:47:59 +02:00
|
|
|
return true;
|
2004-01-15 00:01:55 +01:00
|
|
|
|
2002-03-22 00:27:25 +01:00
|
|
|
relOid = rte->relid;
|
2000-03-09 06:15:33 +01:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* userid to check as: current user unless we have a setuid indication.
|
2000-09-29 20:21:41 +02:00
|
|
|
*
|
2005-11-22 19:17:34 +01:00
|
|
|
* Note: GetUserId() is presently fast enough that there's no harm in
|
2014-05-06 18:12:18 +02:00
|
|
|
* calling it separately for each RTE. If that stops being true, we could
|
2005-11-22 19:17:34 +01:00
|
|
|
* call it once in ExecCheckRTPerms and pass the userid down from there.
|
|
|
|
* But for now, no need for the extra clutter.
|
2000-03-09 06:15:33 +01:00
|
|
|
*/
|
2000-09-29 20:21:41 +02:00
|
|
|
userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
|
1999-01-25 13:01:19 +01:00
|
|
|
|
2004-01-15 00:01:55 +01:00
|
|
|
/*
|
2009-01-22 21:16:10 +01:00
|
|
|
* We must have *all* the requiredPerms bits, but some of the bits can be
|
|
|
|
* satisfied from column-level rather than relation-level permissions.
|
|
|
|
* First, remove any bits that are satisfied by relation permissions.
|
2004-01-15 00:01:55 +01:00
|
|
|
*/
|
2009-01-22 21:16:10 +01:00
|
|
|
relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
|
|
|
|
remainingPerms = requiredPerms & ~relPerms;
|
|
|
|
if (remainingPerms != 0)
|
|
|
|
{
|
2015-05-08 00:20:46 +02:00
|
|
|
int col = -1;
|
|
|
|
|
2009-01-22 21:16:10 +01:00
|
|
|
/*
|
|
|
|
* If we lack any permissions that exist only as relation permissions,
|
|
|
|
* we can fail straight away.
|
|
|
|
*/
|
|
|
|
if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
|
2010-07-22 02:47:59 +02:00
|
|
|
return false;
|
2009-01-22 21:16:10 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to see if we have the needed privileges at column level.
|
|
|
|
*
|
|
|
|
* Note: failures just report a table-level error; it would be nicer
|
|
|
|
* to report a column-level error if we have some but not all of the
|
|
|
|
* column privileges.
|
|
|
|
*/
|
|
|
|
if (remainingPerms & ACL_SELECT)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* When the query doesn't explicitly reference any columns (for
|
|
|
|
* example, SELECT COUNT(*) FROM table), allow the query if we
|
|
|
|
* have SELECT on any column of the rel, as per SQL spec.
|
|
|
|
*/
|
|
|
|
if (bms_is_empty(rte->selectedCols))
|
|
|
|
{
|
|
|
|
if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
|
|
|
|
ACLMASK_ANY) != ACLCHECK_OK)
|
2010-07-22 02:47:59 +02:00
|
|
|
return false;
|
2009-01-22 21:16:10 +01:00
|
|
|
}
|
|
|
|
|
2014-11-28 19:37:25 +01:00
|
|
|
while ((col = bms_next_member(rte->selectedCols, col)) >= 0)
|
2009-01-22 21:16:10 +01:00
|
|
|
{
|
2014-11-28 19:37:25 +01:00
|
|
|
/* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
|
|
|
|
AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber;
|
|
|
|
|
|
|
|
if (attno == InvalidAttrNumber)
|
2009-01-22 21:16:10 +01:00
|
|
|
{
|
|
|
|
/* Whole-row reference, must have priv on all cols */
|
|
|
|
if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
|
|
|
|
ACLMASK_ALL) != ACLCHECK_OK)
|
2010-07-22 02:47:59 +02:00
|
|
|
return false;
|
2009-01-22 21:16:10 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2014-11-28 19:37:25 +01:00
|
|
|
if (pg_attribute_aclcheck(relOid, attno, userid,
|
2010-07-22 02:47:59 +02:00
|
|
|
ACL_SELECT) != ACLCHECK_OK)
|
|
|
|
return false;
|
2009-01-22 21:16:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-05-08 00:20:46 +02:00
|
|
|
* Basically the same for the mod columns, for both INSERT and UPDATE
|
|
|
|
* privilege as specified by remainingPerms.
|
2009-01-22 21:16:10 +01:00
|
|
|
*/
|
2015-05-08 00:20:46 +02:00
|
|
|
if (remainingPerms & ACL_INSERT && !ExecCheckRTEPermsModified(relOid,
|
|
|
|
userid,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
rte->insertedCols,
|
|
|
|
ACL_INSERT))
|
2015-05-08 00:20:46 +02:00
|
|
|
return false;
|
2009-01-22 21:16:10 +01:00
|
|
|
|
2015-05-08 00:20:46 +02:00
|
|
|
if (remainingPerms & ACL_UPDATE && !ExecCheckRTEPermsModified(relOid,
|
|
|
|
userid,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
rte->updatedCols,
|
|
|
|
ACL_UPDATE))
|
2015-05-08 00:20:46 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2014-11-28 19:37:25 +01:00
|
|
|
|
2015-05-08 00:20:46 +02:00
|
|
|
/*
|
|
|
|
* ExecCheckRTEPermsModified
|
|
|
|
* Check INSERT or UPDATE access permissions for a single RTE (these
|
|
|
|
* are processed uniformly).
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
ExecCheckRTEPermsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols,
|
|
|
|
AclMode requiredPerms)
|
|
|
|
{
|
|
|
|
int col = -1;
|
|
|
|
|
|
|
|
/*
|
2015-05-24 03:35:49 +02:00
|
|
|
* When the query doesn't explicitly update any columns, allow the query
|
|
|
|
* if we have permission on any column of the rel. This is to handle
|
|
|
|
* SELECT FOR UPDATE as well as possible corner cases in UPDATE.
|
2015-05-08 00:20:46 +02:00
|
|
|
*/
|
|
|
|
if (bms_is_empty(modifiedCols))
|
|
|
|
{
|
|
|
|
if (pg_attribute_aclcheck_all(relOid, userid, requiredPerms,
|
|
|
|
ACLMASK_ANY) != ACLCHECK_OK)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((col = bms_next_member(modifiedCols, col)) >= 0)
|
|
|
|
{
|
|
|
|
/* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
|
|
|
|
AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber;
|
|
|
|
|
|
|
|
if (attno == InvalidAttrNumber)
|
|
|
|
{
|
|
|
|
/* whole-row reference can't happen here */
|
|
|
|
elog(ERROR, "whole-row update is not implemented");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (pg_attribute_aclcheck(relOid, attno, userid,
|
|
|
|
requiredPerms) != ACLCHECK_OK)
|
|
|
|
return false;
|
2009-01-22 21:16:10 +01:00
|
|
|
}
|
|
|
|
}
|
2010-07-22 02:47:59 +02:00
|
|
|
return true;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2004-01-15 00:01:55 +01:00
|
|
|
/*
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
* Check that the query does not imply any writes to non-temp tables;
|
|
|
|
* unless we're in parallel mode, in which case don't even allow writes
|
|
|
|
* to temp tables.
|
2010-02-20 22:24:02 +01:00
|
|
|
*
|
2017-08-07 23:42:47 +02:00
|
|
|
* Note: in a Hot Standby this would need to reject writes to temp
|
|
|
|
* tables just as we do in parallel mode; but an HS standby can't have created
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
* any temp tables in the first place, so no need to check that.
|
2004-01-15 00:01:55 +01:00
|
|
|
*/
|
2003-01-10 23:03:30 +01:00
|
|
|
static void
|
2007-11-15 23:25:18 +01:00
|
|
|
ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
|
2003-01-10 23:03:30 +01:00
|
|
|
{
|
2007-02-22 23:00:26 +01:00
|
|
|
ListCell *l;
|
|
|
|
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
/*
|
2015-05-24 03:35:49 +02:00
|
|
|
* Fail if write permissions are requested in parallel mode for table
|
|
|
|
* (temp or non-temp), otherwise fail for any non-temp table.
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
*/
|
2007-02-22 23:00:26 +01:00
|
|
|
foreach(l, plannedstmt->rtable)
|
2003-01-10 23:03:30 +01:00
|
|
|
{
|
2007-02-22 23:00:26 +01:00
|
|
|
RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
|
2003-01-10 23:03:30 +01:00
|
|
|
|
2004-01-15 00:01:55 +01:00
|
|
|
if (rte->rtekind != RTE_RELATION)
|
|
|
|
continue;
|
2003-01-10 23:03:30 +01:00
|
|
|
|
2004-01-15 00:01:55 +01:00
|
|
|
if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
|
|
|
|
continue;
|
2003-01-10 23:03:30 +01:00
|
|
|
|
2004-01-15 00:01:55 +01:00
|
|
|
if (isTempNamespace(get_rel_namespace(rte->relid)))
|
|
|
|
continue;
|
2003-01-10 23:03:30 +01:00
|
|
|
|
2010-02-20 22:24:02 +01:00
|
|
|
PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
|
2003-01-10 23:03:30 +01:00
|
|
|
}
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
|
|
|
|
if (plannedstmt->commandType != CMD_SELECT || plannedstmt->hasModifyingCTE)
|
|
|
|
PreventCommandIfParallelMode(CreateCommandTag((Node *) plannedstmt));
|
2003-01-10 23:03:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/* ----------------------------------------------------------------
|
1997-09-07 07:04:48 +02:00
|
|
|
* InitPlan
|
|
|
|
*
|
|
|
|
* Initializes the query plan: open files, allocate storage
|
|
|
|
* and start up the rule manager
|
1996-07-09 08:22:35 +02:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
2002-12-05 16:50:39 +01:00
|
|
|
static void
|
2006-02-28 05:10:28 +01:00
|
|
|
InitPlan(QueryDesc *queryDesc, int eflags)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2002-12-05 16:50:39 +01:00
|
|
|
CmdType operation = queryDesc->operation;
|
2007-02-20 18:32:18 +01:00
|
|
|
PlannedStmt *plannedstmt = queryDesc->plannedstmt;
|
|
|
|
Plan *plan = plannedstmt->planTree;
|
|
|
|
List *rangeTable = plannedstmt->rtable;
|
2003-08-04 02:43:34 +02:00
|
|
|
EState *estate = queryDesc->estate;
|
2002-12-05 16:50:39 +01:00
|
|
|
PlanState *planstate;
|
1999-05-25 18:15:34 +02:00
|
|
|
TupleDesc tupType;
|
2006-04-30 20:30:40 +02:00
|
|
|
ListCell *l;
|
2007-02-27 02:11:26 +01:00
|
|
|
int i;
|
1997-08-19 06:46:15 +02:00
|
|
|
|
2000-03-09 06:15:33 +01:00
|
|
|
/*
|
2007-02-22 23:00:26 +01:00
|
|
|
* Do permissions checks
|
2000-03-09 06:15:33 +01:00
|
|
|
*/
|
2010-07-22 02:47:59 +02:00
|
|
|
ExecCheckRTPerms(rangeTable, true);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
1999-05-25 18:15:34 +02:00
|
|
|
* initialize the node's execution state
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
estate->es_range_table = rangeTable;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_plannedstmt = plannedstmt;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2009-10-10 03:43:50 +02:00
|
|
|
* initialize result relation stuff, and open/lock the result rels.
|
|
|
|
*
|
2010-02-26 03:01:40 +01:00
|
|
|
* We must do this before initializing the plan tree, else we might try to
|
|
|
|
* do a lock upgrade if a result rel is also a source rel.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2007-02-20 18:32:18 +01:00
|
|
|
if (plannedstmt->resultRelations)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2007-02-20 18:32:18 +01:00
|
|
|
List *resultRelations = plannedstmt->resultRelations;
|
|
|
|
int numResultRelations = list_length(resultRelations);
|
2000-11-12 01:37:02 +01:00
|
|
|
ResultRelInfo *resultRelInfos;
|
2007-02-20 18:32:18 +01:00
|
|
|
ResultRelInfo *resultRelInfo;
|
1999-05-25 18:15:34 +02:00
|
|
|
|
2007-02-20 18:32:18 +01:00
|
|
|
resultRelInfos = (ResultRelInfo *)
|
|
|
|
palloc(numResultRelations * sizeof(ResultRelInfo));
|
|
|
|
resultRelInfo = resultRelInfos;
|
|
|
|
foreach(l, resultRelations)
|
2000-11-12 01:37:02 +01:00
|
|
|
{
|
2007-08-15 23:39:50 +02:00
|
|
|
Index resultRelationIndex = lfirst_int(l);
|
|
|
|
Oid resultRelationOid;
|
|
|
|
Relation resultRelation;
|
|
|
|
|
|
|
|
resultRelationOid = getrelid(resultRelationIndex, rangeTable);
|
|
|
|
resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
|
2017-01-19 18:30:27 +01:00
|
|
|
|
2008-03-28 01:21:56 +01:00
|
|
|
InitResultRelInfo(resultRelInfo,
|
2007-08-15 23:39:50 +02:00
|
|
|
resultRelation,
|
|
|
|
resultRelationIndex,
|
2017-01-04 20:36:34 +01:00
|
|
|
NULL,
|
2005-03-25 22:58:00 +01:00
|
|
|
estate->es_instrument);
|
2007-02-20 18:32:18 +01:00
|
|
|
resultRelInfo++;
|
2000-11-12 01:37:02 +01:00
|
|
|
}
|
|
|
|
estate->es_result_relations = resultRelInfos;
|
|
|
|
estate->es_num_result_relations = numResultRelations;
|
2009-10-10 03:43:50 +02:00
|
|
|
/* es_result_relation_info is NULL except when within ModifyTable */
|
|
|
|
estate->es_result_relation_info = NULL;
|
2017-03-21 14:48:04 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* In the partitioned result relation case, lock the non-leaf result
|
2017-05-01 14:23:01 +02:00
|
|
|
* relations too. A subset of these are the roots of respective
|
2018-06-08 17:55:12 +02:00
|
|
|
* partitioned tables, for which we also allocate ResultRelInfos.
|
2017-03-21 14:48:04 +01:00
|
|
|
*/
|
2017-05-01 14:23:01 +02:00
|
|
|
estate->es_root_result_relations = NULL;
|
|
|
|
estate->es_num_root_result_relations = 0;
|
2017-03-21 14:48:04 +01:00
|
|
|
if (plannedstmt->nonleafResultRelations)
|
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
int num_roots = list_length(plannedstmt->rootResultRelations);
|
2017-05-01 14:23:01 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Firstly, build ResultRelInfos for all the partitioned table
|
|
|
|
* roots, because we will need them to fire the statement-level
|
|
|
|
* triggers, if any.
|
|
|
|
*/
|
|
|
|
resultRelInfos = (ResultRelInfo *)
|
2017-05-17 22:31:56 +02:00
|
|
|
palloc(num_roots * sizeof(ResultRelInfo));
|
2017-05-01 14:23:01 +02:00
|
|
|
resultRelInfo = resultRelInfos;
|
|
|
|
foreach(l, plannedstmt->rootResultRelations)
|
|
|
|
{
|
|
|
|
Index resultRelIndex = lfirst_int(l);
|
|
|
|
Oid resultRelOid;
|
|
|
|
Relation resultRelDesc;
|
|
|
|
|
|
|
|
resultRelOid = getrelid(resultRelIndex, rangeTable);
|
|
|
|
resultRelDesc = heap_open(resultRelOid, RowExclusiveLock);
|
|
|
|
InitResultRelInfo(resultRelInfo,
|
|
|
|
resultRelDesc,
|
|
|
|
lfirst_int(l),
|
|
|
|
NULL,
|
|
|
|
estate->es_instrument);
|
|
|
|
resultRelInfo++;
|
|
|
|
}
|
|
|
|
|
|
|
|
estate->es_root_result_relations = resultRelInfos;
|
|
|
|
estate->es_num_root_result_relations = num_roots;
|
|
|
|
|
|
|
|
/* Simply lock the rest of them. */
|
2017-03-21 14:48:04 +01:00
|
|
|
foreach(l, plannedstmt->nonleafResultRelations)
|
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
Index resultRelIndex = lfirst_int(l);
|
2017-03-21 14:48:04 +01:00
|
|
|
|
2017-05-01 14:23:01 +02:00
|
|
|
/* We locked the roots above. */
|
|
|
|
if (!list_member_int(plannedstmt->rootResultRelations,
|
|
|
|
resultRelIndex))
|
|
|
|
LockRelationOid(getrelid(resultRelIndex, rangeTable),
|
|
|
|
RowExclusiveLock);
|
2017-03-21 14:48:04 +01:00
|
|
|
}
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
|
|
|
{
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
1999-05-25 18:15:34 +02:00
|
|
|
* if no result relation, then set state appropriately
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2000-11-12 01:37:02 +01:00
|
|
|
estate->es_result_relations = NULL;
|
|
|
|
estate->es_num_result_relations = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
estate->es_result_relation_info = NULL;
|
2017-05-01 14:23:01 +02:00
|
|
|
estate->es_root_result_relations = NULL;
|
|
|
|
estate->es_num_root_result_relations = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
2003-01-23 06:10:41 +01:00
|
|
|
/*
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
* Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
|
2010-02-26 03:01:40 +01:00
|
|
|
* before we initialize the plan tree, else we'd be risking lock upgrades.
|
2017-03-21 14:48:04 +01:00
|
|
|
* While we are at it, build the ExecRowMark list. Any partitioned child
|
|
|
|
* tables are ignored here (because isParent=true) and will be locked by
|
|
|
|
* the first Append or MergeAppend node that references them. (Note that
|
|
|
|
* the RowMarks corresponding to partitioned child tables are present in
|
|
|
|
* the same list as the rest, i.e., plannedstmt->rowMarks.)
|
1999-01-25 13:01:19 +01:00
|
|
|
*/
|
2005-08-01 22:31:16 +02:00
|
|
|
estate->es_rowMarks = NIL;
|
2007-02-20 18:32:18 +01:00
|
|
|
foreach(l, plannedstmt->rowMarks)
|
1999-01-25 13:01:19 +01:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
PlanRowMark *rc = (PlanRowMark *) lfirst(l);
|
2008-11-15 20:43:47 +01:00
|
|
|
Oid relid;
|
2006-04-30 20:30:40 +02:00
|
|
|
Relation relation;
|
|
|
|
ExecRowMark *erm;
|
|
|
|
|
2008-11-15 20:43:47 +01:00
|
|
|
/* ignore "parent" rowmarks; they are irrelevant at runtime */
|
|
|
|
if (rc->isParent)
|
|
|
|
continue;
|
|
|
|
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
/* get relation's OID (will produce InvalidOid if subquery) */
|
|
|
|
relid = getrelid(rc->rti, rangeTable);
|
|
|
|
|
2015-03-24 20:53:06 +01:00
|
|
|
/*
|
|
|
|
* If you change the conditions under which rel locks are acquired
|
|
|
|
* here, be sure to adjust ExecOpenScanRelation to match.
|
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
switch (rc->markType)
|
|
|
|
{
|
|
|
|
case ROW_MARK_EXCLUSIVE:
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
case ROW_MARK_NOKEYEXCLUSIVE:
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
case ROW_MARK_SHARE:
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
case ROW_MARK_KEYSHARE:
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
relation = heap_open(relid, RowShareLock);
|
|
|
|
break;
|
|
|
|
case ROW_MARK_REFERENCE:
|
|
|
|
relation = heap_open(relid, AccessShareLock);
|
|
|
|
break;
|
|
|
|
case ROW_MARK_COPY:
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
/* no physical table access is required */
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
relation = NULL;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized markType: %d", rc->markType);
|
|
|
|
relation = NULL; /* keep compiler quiet */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2011-06-02 20:46:15 +02:00
|
|
|
/* Check that relation is a legal target for marking */
|
|
|
|
if (relation)
|
|
|
|
CheckValidRowMarkRel(relation, rc->markType);
|
2011-02-20 06:17:18 +01:00
|
|
|
|
2006-04-30 20:30:40 +02:00
|
|
|
erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
|
|
|
|
erm->relation = relation;
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
erm->relid = relid;
|
2006-04-30 20:30:40 +02:00
|
|
|
erm->rti = rc->rti;
|
2008-11-15 20:43:47 +01:00
|
|
|
erm->prti = rc->prti;
|
2011-02-10 05:27:07 +01:00
|
|
|
erm->rowmarkId = rc->rowmarkId;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
erm->markType = rc->markType;
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
erm->strength = rc->strength;
|
2014-10-07 22:23:34 +02:00
|
|
|
erm->waitPolicy = rc->waitPolicy;
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
erm->ermActive = false;
|
2008-11-16 18:34:28 +01:00
|
|
|
ItemPointerSetInvalid(&(erm->curCtid));
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
erm->ermExtra = NULL;
|
2006-04-30 20:30:40 +02:00
|
|
|
estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
|
1999-01-25 13:01:19 +01:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2009-10-10 03:43:50 +02:00
|
|
|
* Initialize the executor's tuple table to empty.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2009-09-27 22:09:58 +02:00
|
|
|
estate->es_tupleTable = NIL;
|
2009-10-10 03:43:50 +02:00
|
|
|
estate->es_trig_tuple_slot = NULL;
|
2009-11-20 21:38:12 +01:00
|
|
|
estate->es_trig_oldtup_slot = NULL;
|
2011-08-22 00:15:55 +02:00
|
|
|
estate->es_trig_newtup_slot = NULL;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2001-05-15 02:33:36 +02:00
|
|
|
/* mark EvalPlanQual not active */
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_epqTuple = NULL;
|
|
|
|
estate->es_epqTupleSet = NULL;
|
|
|
|
estate->es_epqScanDone = NULL;
|
2001-05-15 02:33:36 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Initialize private state information for each SubPlan. We must do this
|
|
|
|
* before running ExecInitNode on the main query tree, since
|
2007-02-27 02:11:26 +01:00
|
|
|
* ExecInitSubPlan expects to be able to find these entries.
|
|
|
|
*/
|
|
|
|
Assert(estate->es_subplanstates == NIL);
|
|
|
|
i = 1; /* subplan indices count from 1 */
|
|
|
|
foreach(l, plannedstmt->subplans)
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
Plan *subplan = (Plan *) lfirst(l);
|
|
|
|
PlanState *subplanstate;
|
|
|
|
int sp_eflags;
|
2007-02-27 02:11:26 +01:00
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
|
|
|
|
* it is a parameterless subplan (not initplan), we suggest that it be
|
|
|
|
* prepared to handle REWIND efficiently; otherwise there is no need.
|
2007-02-27 02:11:26 +01:00
|
|
|
*/
|
2013-11-03 00:38:17 +01:00
|
|
|
sp_eflags = eflags
|
|
|
|
& (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA);
|
2007-02-27 02:11:26 +01:00
|
|
|
if (bms_is_member(i, plannedstmt->rewindPlanIDs))
|
|
|
|
sp_eflags |= EXEC_FLAG_REWIND;
|
|
|
|
|
|
|
|
subplanstate = ExecInitNode(subplan, estate, sp_eflags);
|
|
|
|
|
|
|
|
estate->es_subplanstates = lappend(estate->es_subplanstates,
|
|
|
|
subplanstate);
|
|
|
|
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the private state information for all the nodes in the query
|
2005-10-15 04:49:52 +02:00
|
|
|
* tree. This opens files, allocates storage and leaves us ready to start
|
|
|
|
* processing tuples.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2006-02-28 05:10:28 +01:00
|
|
|
planstate = ExecInitNode(plan, estate, eflags);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
Restructure SELECT INTO's parsetree representation into CreateTableAsStmt.
Making this operation look like a utility statement seems generally a good
idea, and particularly so in light of the desire to provide command
triggers for utility statements. The original choice of representing it as
SELECT with an IntoClause appendage had metastasized into rather a lot of
places, unfortunately, so that this patch is a great deal more complicated
than one might at first expect.
In particular, keeping EXPLAIN working for SELECT INTO and CREATE TABLE AS
subcommands required restructuring some EXPLAIN-related APIs. Add-on code
that calls ExplainOnePlan or ExplainOneUtility, or uses
ExplainOneQuery_hook, will need adjustment.
Also, the cases PREPARE ... SELECT INTO and CREATE RULE ... SELECT INTO,
which formerly were accepted though undocumented, are no longer accepted.
The PREPARE case can be replaced with use of CREATE TABLE AS EXECUTE.
The CREATE RULE case doesn't seem to have much real-world use (since the
rule would work only once before failing with "table already exists"),
so we'll not bother with that one.
Both SELECT INTO and CREATE TABLE AS still return a command tag of
"SELECT nnnn". There was some discussion of returning "CREATE TABLE nnnn",
but for the moment backwards compatibility wins the day.
Andres Freund and Tom Lane
2012-03-20 02:37:19 +01:00
|
|
|
* Get the tuple descriptor describing the type of tuples to return.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2003-05-05 19:57:47 +02:00
|
|
|
tupType = ExecGetResultType(planstate);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Initialize the junk filter if needed. SELECT queries need a filter if
|
|
|
|
* there are any junk attrs in the top-level tlist.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2009-10-10 03:43:50 +02:00
|
|
|
if (operation == CMD_SELECT)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1998-09-01 06:40:42 +02:00
|
|
|
bool junk_filter_needed = false;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *tlist;
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2009-10-10 03:43:50 +02:00
|
|
|
foreach(tlist, plan->targetlist)
|
1998-07-19 05:46:29 +02:00
|
|
|
{
|
2009-10-10 03:43:50 +02:00
|
|
|
TargetEntry *tle = (TargetEntry *) lfirst(tlist);
|
1999-10-31 01:13:30 +02:00
|
|
|
|
2009-10-10 03:43:50 +02:00
|
|
|
if (tle->resjunk)
|
|
|
|
{
|
1999-10-31 01:13:30 +02:00
|
|
|
junk_filter_needed = true;
|
|
|
|
break;
|
2009-10-10 03:43:50 +02:00
|
|
|
}
|
1998-07-19 05:46:29 +02:00
|
|
|
}
|
|
|
|
|
1999-10-31 01:13:30 +02:00
|
|
|
if (junk_filter_needed)
|
1998-07-19 05:46:29 +02:00
|
|
|
{
|
2009-10-10 03:43:50 +02:00
|
|
|
JunkFilter *j;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2009-10-10 03:43:50 +02:00
|
|
|
j = ExecInitJunkFilter(planstate->plan->targetlist,
|
|
|
|
tupType->tdhasoid,
|
2018-02-17 06:17:38 +01:00
|
|
|
ExecInitExtraTupleSlot(estate, NULL));
|
2009-10-10 03:43:50 +02:00
|
|
|
estate->es_junkFilter = j;
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2009-10-10 03:43:50 +02:00
|
|
|
/* Want to return the cleaned tuple type */
|
|
|
|
tupType = j->jf_cleanTupType;
|
2008-04-21 05:49:45 +02:00
|
|
|
}
|
1998-07-19 05:46:29 +02:00
|
|
|
}
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2002-12-05 16:50:39 +01:00
|
|
|
queryDesc->tupDesc = tupType;
|
|
|
|
queryDesc->planstate = planstate;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2000-11-12 01:37:02 +01:00
|
|
|
/*
|
2011-02-26 00:56:23 +01:00
|
|
|
* Check that a proposed result relation is a legal target for the operation
|
|
|
|
*
|
2012-12-09 00:25:48 +01:00
|
|
|
* Generally the parser and/or planner should have noticed any such mistake
|
|
|
|
* already, but let's make sure.
|
2011-06-02 20:46:15 +02:00
|
|
|
*
|
|
|
|
* Note: when changing this function, you probably also need to look at
|
|
|
|
* CheckValidRowMarkRel.
|
2000-11-12 01:37:02 +01:00
|
|
|
*/
|
2008-03-28 01:21:56 +01:00
|
|
|
void
|
2017-09-07 16:55:45 +02:00
|
|
|
CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation)
|
2000-11-12 01:37:02 +01:00
|
|
|
{
|
2017-09-07 16:55:45 +02:00
|
|
|
Relation resultRel = resultRelInfo->ri_RelationDesc;
|
2011-04-10 17:42:00 +02:00
|
|
|
TriggerDesc *trigDesc = resultRel->trigdesc;
|
2013-03-10 19:14:53 +01:00
|
|
|
FdwRoutine *fdwroutine;
|
2010-10-10 19:43:33 +02:00
|
|
|
|
2011-02-26 00:56:23 +01:00
|
|
|
switch (resultRel->rd_rel->relkind)
|
2000-11-12 01:37:02 +01:00
|
|
|
{
|
2007-08-15 23:39:50 +02:00
|
|
|
case RELKIND_RELATION:
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
case RELKIND_PARTITIONED_TABLE:
|
2017-01-19 18:00:00 +01:00
|
|
|
CheckCmdReplicaIdentity(resultRel, operation);
|
2007-08-15 23:39:50 +02:00
|
|
|
break;
|
2000-11-12 01:37:02 +01:00
|
|
|
case RELKIND_SEQUENCE:
|
2003-07-21 19:05:12 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
2003-09-25 08:58:07 +02:00
|
|
|
errmsg("cannot change sequence \"%s\"",
|
2011-02-26 00:56:23 +01:00
|
|
|
RelationGetRelationName(resultRel))));
|
2000-11-12 01:37:02 +01:00
|
|
|
break;
|
|
|
|
case RELKIND_TOASTVALUE:
|
2003-07-21 19:05:12 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
2003-09-25 08:58:07 +02:00
|
|
|
errmsg("cannot change TOAST relation \"%s\"",
|
2011-02-26 00:56:23 +01:00
|
|
|
RelationGetRelationName(resultRel))));
|
2000-11-12 01:37:02 +01:00
|
|
|
break;
|
|
|
|
case RELKIND_VIEW:
|
2013-05-29 22:58:43 +02:00
|
|
|
|
2012-12-09 00:25:48 +01:00
|
|
|
/*
|
|
|
|
* Okay only if there's a suitable INSTEAD OF trigger. Messages
|
|
|
|
* here should match rewriteHandler.c's rewriteTargetView, except
|
|
|
|
* that we omit errdetail because we haven't got the information
|
2013-05-29 22:58:43 +02:00
|
|
|
* handy (and given that we really shouldn't get here anyway, it's
|
|
|
|
* not worth great exertion to get).
|
2012-12-09 00:25:48 +01:00
|
|
|
*/
|
2010-10-10 19:43:33 +02:00
|
|
|
switch (operation)
|
|
|
|
{
|
|
|
|
case CMD_INSERT:
|
|
|
|
if (!trigDesc || !trigDesc->trig_insert_instead_row)
|
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("cannot insert into view \"%s\"",
|
|
|
|
RelationGetRelationName(resultRel)),
|
|
|
|
errhint("To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.")));
|
2010-10-10 19:43:33 +02:00
|
|
|
break;
|
|
|
|
case CMD_UPDATE:
|
|
|
|
if (!trigDesc || !trigDesc->trig_update_instead_row)
|
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("cannot update view \"%s\"",
|
|
|
|
RelationGetRelationName(resultRel)),
|
|
|
|
errhint("To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule.")));
|
2010-10-10 19:43:33 +02:00
|
|
|
break;
|
|
|
|
case CMD_DELETE:
|
|
|
|
if (!trigDesc || !trigDesc->trig_delete_instead_row)
|
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("cannot delete from view \"%s\"",
|
|
|
|
RelationGetRelationName(resultRel)),
|
|
|
|
errhint("To enable deleting from the view, provide an INSTEAD OF DELETE trigger or an unconditional ON DELETE DO INSTEAD rule.")));
|
2010-10-10 19:43:33 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized CmdType: %d", (int) operation);
|
|
|
|
break;
|
|
|
|
}
|
2000-11-12 01:37:02 +01:00
|
|
|
break;
|
2013-03-04 01:23:31 +01:00
|
|
|
case RELKIND_MATVIEW:
|
2013-07-16 19:55:44 +02:00
|
|
|
if (!MatViewIncrementalMaintenanceIsEnabled())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
|
|
|
errmsg("cannot change materialized view \"%s\"",
|
|
|
|
RelationGetRelationName(resultRel))));
|
2013-03-04 01:23:31 +01:00
|
|
|
break;
|
2011-01-02 05:48:11 +01:00
|
|
|
case RELKIND_FOREIGN_TABLE:
|
2013-03-10 19:14:53 +01:00
|
|
|
/* Okay only if the FDW supports it */
|
2017-09-07 16:55:45 +02:00
|
|
|
fdwroutine = resultRelInfo->ri_FdwRoutine;
|
2013-03-10 19:14:53 +01:00
|
|
|
switch (operation)
|
|
|
|
{
|
|
|
|
case CMD_INSERT:
|
|
|
|
if (fdwroutine->ExecForeignInsert == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errmsg("cannot insert into foreign table \"%s\"",
|
|
|
|
RelationGetRelationName(resultRel))));
|
2013-06-12 23:52:54 +02:00
|
|
|
if (fdwroutine->IsForeignRelUpdatable != NULL &&
|
|
|
|
(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_INSERT)) == 0)
|
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("foreign table \"%s\" does not allow inserts",
|
|
|
|
RelationGetRelationName(resultRel))));
|
2013-03-10 19:14:53 +01:00
|
|
|
break;
|
|
|
|
case CMD_UPDATE:
|
|
|
|
if (fdwroutine->ExecForeignUpdate == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("cannot update foreign table \"%s\"",
|
|
|
|
RelationGetRelationName(resultRel))));
|
2013-06-12 23:52:54 +02:00
|
|
|
if (fdwroutine->IsForeignRelUpdatable != NULL &&
|
|
|
|
(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_UPDATE)) == 0)
|
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("foreign table \"%s\" does not allow updates",
|
|
|
|
RelationGetRelationName(resultRel))));
|
2013-03-10 19:14:53 +01:00
|
|
|
break;
|
|
|
|
case CMD_DELETE:
|
|
|
|
if (fdwroutine->ExecForeignDelete == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errmsg("cannot delete from foreign table \"%s\"",
|
|
|
|
RelationGetRelationName(resultRel))));
|
2013-06-12 23:52:54 +02:00
|
|
|
if (fdwroutine->IsForeignRelUpdatable != NULL &&
|
|
|
|
(fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_DELETE)) == 0)
|
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("foreign table \"%s\" does not allow deletes",
|
|
|
|
RelationGetRelationName(resultRel))));
|
2013-03-10 19:14:53 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized CmdType: %d", (int) operation);
|
|
|
|
break;
|
|
|
|
}
|
2011-01-02 05:48:11 +01:00
|
|
|
break;
|
2007-08-15 23:39:50 +02:00
|
|
|
default:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
|
|
|
errmsg("cannot change relation \"%s\"",
|
2011-02-26 00:56:23 +01:00
|
|
|
RelationGetRelationName(resultRel))));
|
2007-08-15 23:39:50 +02:00
|
|
|
break;
|
2000-11-12 01:37:02 +01:00
|
|
|
}
|
2011-02-26 00:56:23 +01:00
|
|
|
}
|
2000-11-12 01:37:02 +01:00
|
|
|
|
2011-06-02 20:46:15 +02:00
|
|
|
/*
|
|
|
|
* Check that a proposed rowmark target relation is a legal target
|
|
|
|
*
|
|
|
|
* In most cases parser and/or planner should have noticed this already, but
|
|
|
|
* they don't cover all cases.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
CheckValidRowMarkRel(Relation rel, RowMarkType markType)
|
|
|
|
{
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
FdwRoutine *fdwroutine;
|
|
|
|
|
2011-06-02 20:46:15 +02:00
|
|
|
switch (rel->rd_rel->relkind)
|
|
|
|
{
|
|
|
|
case RELKIND_RELATION:
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
case RELKIND_PARTITIONED_TABLE:
|
2011-06-02 20:46:15 +02:00
|
|
|
/* OK */
|
|
|
|
break;
|
|
|
|
case RELKIND_SEQUENCE:
|
|
|
|
/* Must disallow this because we don't vacuum sequences */
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
|
|
|
errmsg("cannot lock rows in sequence \"%s\"",
|
|
|
|
RelationGetRelationName(rel))));
|
|
|
|
break;
|
|
|
|
case RELKIND_TOASTVALUE:
|
|
|
|
/* We could allow this, but there seems no good reason to */
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
|
|
|
errmsg("cannot lock rows in TOAST relation \"%s\"",
|
|
|
|
RelationGetRelationName(rel))));
|
|
|
|
break;
|
|
|
|
case RELKIND_VIEW:
|
2012-12-09 00:25:48 +01:00
|
|
|
/* Should not get here; planner should have expanded the view */
|
2011-06-02 20:46:15 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
|
|
|
errmsg("cannot lock rows in view \"%s\"",
|
|
|
|
RelationGetRelationName(rel))));
|
|
|
|
break;
|
2013-03-04 01:23:31 +01:00
|
|
|
case RELKIND_MATVIEW:
|
2014-03-06 17:37:02 +01:00
|
|
|
/* Allow referencing a matview, but not actual locking clauses */
|
|
|
|
if (markType != ROW_MARK_REFERENCE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errmsg("cannot lock rows in materialized view \"%s\"",
|
|
|
|
RelationGetRelationName(rel))));
|
2013-03-04 01:23:31 +01:00
|
|
|
break;
|
2011-06-02 20:46:15 +02:00
|
|
|
case RELKIND_FOREIGN_TABLE:
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
/* Okay only if the FDW supports it */
|
|
|
|
fdwroutine = GetFdwRoutineForRelation(rel, false);
|
|
|
|
if (fdwroutine->RefetchForeignRow == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("cannot lock rows in foreign table \"%s\"",
|
|
|
|
RelationGetRelationName(rel))));
|
2011-06-02 20:46:15 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
|
|
|
errmsg("cannot lock rows in relation \"%s\"",
|
|
|
|
RelationGetRelationName(rel))));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-02-26 00:56:23 +01:00
|
|
|
/*
|
|
|
|
* Initialize ResultRelInfo data for one result relation
|
|
|
|
*
|
|
|
|
* Caution: before Postgres 9.1, this function included the relkind checking
|
|
|
|
* that's now in CheckValidResultRel, and it also did ExecOpenIndices if
|
|
|
|
* appropriate. Be sure callers cover those needs.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
InitResultRelInfo(ResultRelInfo *resultRelInfo,
|
|
|
|
Relation resultRelationDesc,
|
|
|
|
Index resultRelationIndex,
|
2017-01-04 20:36:34 +01:00
|
|
|
Relation partition_root,
|
2011-02-26 00:56:23 +01:00
|
|
|
int instrument_options)
|
|
|
|
{
|
2017-01-24 16:20:02 +01:00
|
|
|
List *partition_check = NIL;
|
2017-01-19 18:30:27 +01:00
|
|
|
|
2000-11-12 01:37:02 +01:00
|
|
|
MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
|
|
|
|
resultRelInfo->type = T_ResultRelInfo;
|
|
|
|
resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
|
|
|
|
resultRelInfo->ri_RelationDesc = resultRelationDesc;
|
|
|
|
resultRelInfo->ri_NumIndices = 0;
|
|
|
|
resultRelInfo->ri_IndexRelationDescs = NULL;
|
|
|
|
resultRelInfo->ri_IndexRelationInfo = NULL;
|
2002-10-14 18:51:30 +02:00
|
|
|
/* make a copy so as not to depend on relcache info not changing... */
|
2011-02-26 00:56:23 +01:00
|
|
|
resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
|
2005-03-25 22:58:00 +01:00
|
|
|
if (resultRelInfo->ri_TrigDesc)
|
|
|
|
{
|
2005-10-15 04:49:52 +02:00
|
|
|
int n = resultRelInfo->ri_TrigDesc->numtriggers;
|
2005-03-25 22:58:00 +01:00
|
|
|
|
|
|
|
resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
|
|
|
|
palloc0(n * sizeof(FmgrInfo));
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
resultRelInfo->ri_TrigWhenExprs = (ExprState **)
|
|
|
|
palloc0(n * sizeof(ExprState *));
|
2009-12-15 05:57:48 +01:00
|
|
|
if (instrument_options)
|
|
|
|
resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
|
2005-03-25 22:58:00 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
resultRelInfo->ri_TrigFunctions = NULL;
|
2009-11-20 21:38:12 +01:00
|
|
|
resultRelInfo->ri_TrigWhenExprs = NULL;
|
2005-03-25 22:58:00 +01:00
|
|
|
resultRelInfo->ri_TrigInstrument = NULL;
|
|
|
|
}
|
2013-03-10 19:14:53 +01:00
|
|
|
if (resultRelationDesc->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
|
|
|
|
resultRelInfo->ri_FdwRoutine = GetFdwRoutineForRelation(resultRelationDesc, true);
|
|
|
|
else
|
|
|
|
resultRelInfo->ri_FdwRoutine = NULL;
|
2018-03-26 15:43:54 +02:00
|
|
|
|
|
|
|
/* The following fields are set later if needed */
|
2013-03-10 19:14:53 +01:00
|
|
|
resultRelInfo->ri_FdwState = NULL;
|
2016-03-18 18:48:58 +01:00
|
|
|
resultRelInfo->ri_usesFdwDirectModify = false;
|
2000-11-12 01:37:02 +01:00
|
|
|
resultRelInfo->ri_ConstraintExprs = NULL;
|
|
|
|
resultRelInfo->ri_junkFilter = NULL;
|
2006-08-12 04:52:06 +02:00
|
|
|
resultRelInfo->ri_projectReturning = NULL;
|
2018-03-26 15:43:54 +02:00
|
|
|
resultRelInfo->ri_onConflictArbiterIndexes = NIL;
|
|
|
|
resultRelInfo->ri_onConflict = NULL;
|
2017-01-19 18:30:27 +01:00
|
|
|
|
2017-01-04 20:36:34 +01:00
|
|
|
/*
|
2017-06-07 18:45:32 +02:00
|
|
|
* Partition constraint, which also includes the partition constraint of
|
|
|
|
* all the ancestors that are partitions. Note that it will be checked
|
|
|
|
* even in the case of tuple-routing where this table is the target leaf
|
|
|
|
* partition, if there any BR triggers defined on the table. Although
|
|
|
|
* tuple-routing implicitly preserves the partition constraint of the
|
|
|
|
* target partition for a given row, the BR triggers may change the row
|
2017-06-13 19:05:59 +02:00
|
|
|
* such that the constraint is no longer satisfied, which we must fail for
|
|
|
|
* by checking it explicitly.
|
2017-06-07 18:45:32 +02:00
|
|
|
*
|
|
|
|
* If this is a partitioned table, the partition constraint (if any) of a
|
|
|
|
* given row will be checked just before performing tuple-routing.
|
2017-01-04 20:36:34 +01:00
|
|
|
*/
|
2017-06-07 18:45:32 +02:00
|
|
|
partition_check = RelationGetPartitionQual(resultRelationDesc);
|
2017-01-19 18:30:27 +01:00
|
|
|
|
|
|
|
resultRelInfo->ri_PartitionCheck = partition_check;
|
2017-01-04 20:36:34 +01:00
|
|
|
resultRelInfo->ri_PartitionRoot = partition_root;
|
2018-04-07 01:16:11 +02:00
|
|
|
resultRelInfo->ri_PartitionReadyForRouting = false;
|
2000-11-12 01:37:02 +01:00
|
|
|
}
|
|
|
|
|
2007-08-15 23:39:50 +02:00
|
|
|
/*
|
|
|
|
* ExecGetTriggerResultRel
|
|
|
|
*
|
|
|
|
* Get a ResultRelInfo for a trigger target relation. Most of the time,
|
|
|
|
* triggers are fired on one of the result relations of the query, and so
|
2017-08-18 19:01:05 +02:00
|
|
|
* we can just return a member of the es_result_relations array, the
|
|
|
|
* es_root_result_relations array (if any), or the es_leaf_result_relations
|
|
|
|
* list (if any). (Note: in self-join situations there might be multiple
|
|
|
|
* members with the same OID; if so it doesn't matter which one we pick.)
|
|
|
|
* However, it is sometimes necessary to fire triggers on other relations;
|
|
|
|
* this happens mainly when an RI update trigger queues additional triggers
|
|
|
|
* on other relations, which will be processed in the context of the outer
|
|
|
|
* query. For efficiency's sake, we want to have a ResultRelInfo for those
|
|
|
|
* triggers too; that can avoid repeated re-opening of the relation. (It
|
|
|
|
* also provides a way for EXPLAIN ANALYZE to report the runtimes of such
|
|
|
|
* triggers.) So we make additional ResultRelInfo's as needed, and save them
|
|
|
|
* in es_trig_target_relations.
|
2007-08-15 23:39:50 +02:00
|
|
|
*/
|
|
|
|
ResultRelInfo *
|
|
|
|
ExecGetTriggerResultRel(EState *estate, Oid relid)
|
|
|
|
{
|
|
|
|
ResultRelInfo *rInfo;
|
|
|
|
int nr;
|
|
|
|
ListCell *l;
|
|
|
|
Relation rel;
|
|
|
|
MemoryContext oldcontext;
|
|
|
|
|
|
|
|
/* First, search through the query result relations */
|
|
|
|
rInfo = estate->es_result_relations;
|
|
|
|
nr = estate->es_num_result_relations;
|
|
|
|
while (nr > 0)
|
|
|
|
{
|
|
|
|
if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
|
|
|
|
return rInfo;
|
|
|
|
rInfo++;
|
|
|
|
nr--;
|
|
|
|
}
|
2017-08-18 19:01:05 +02:00
|
|
|
/* Second, search through the root result relations, if any */
|
|
|
|
rInfo = estate->es_root_result_relations;
|
|
|
|
nr = estate->es_num_root_result_relations;
|
|
|
|
while (nr > 0)
|
|
|
|
{
|
|
|
|
if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
|
|
|
|
return rInfo;
|
|
|
|
rInfo++;
|
|
|
|
nr--;
|
|
|
|
}
|
2018-04-26 20:47:16 +02:00
|
|
|
|
2018-02-08 20:29:05 +01:00
|
|
|
/*
|
|
|
|
* Third, search through the result relations that were created during
|
|
|
|
* tuple routing, if any.
|
|
|
|
*/
|
|
|
|
foreach(l, estate->es_tuple_routing_result_relations)
|
2017-08-18 19:01:05 +02:00
|
|
|
{
|
|
|
|
rInfo = (ResultRelInfo *) lfirst(l);
|
|
|
|
if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
|
|
|
|
return rInfo;
|
|
|
|
}
|
2007-08-15 23:39:50 +02:00
|
|
|
/* Nope, but maybe we already made an extra ResultRelInfo for it */
|
|
|
|
foreach(l, estate->es_trig_target_relations)
|
|
|
|
{
|
|
|
|
rInfo = (ResultRelInfo *) lfirst(l);
|
|
|
|
if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
|
|
|
|
return rInfo;
|
|
|
|
}
|
|
|
|
/* Nope, so we need a new one */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Open the target relation's relcache entry. We assume that an
|
2007-11-15 22:14:46 +01:00
|
|
|
* appropriate lock is still held by the backend from whenever the trigger
|
2014-05-06 18:12:18 +02:00
|
|
|
* event got queued, so we need take no new lock here. Also, we need not
|
2011-04-10 17:42:00 +02:00
|
|
|
* recheck the relkind, so no need for CheckValidResultRel.
|
2007-08-15 23:39:50 +02:00
|
|
|
*/
|
|
|
|
rel = heap_open(relid, NoLock);
|
|
|
|
|
|
|
|
/*
|
2011-02-26 00:56:23 +01:00
|
|
|
* Make the new entry in the right context.
|
2007-08-15 23:39:50 +02:00
|
|
|
*/
|
|
|
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
rInfo = makeNode(ResultRelInfo);
|
2008-03-28 01:21:56 +01:00
|
|
|
InitResultRelInfo(rInfo,
|
2007-08-15 23:39:50 +02:00
|
|
|
rel,
|
|
|
|
0, /* dummy rangetable index */
|
2017-01-04 20:36:34 +01:00
|
|
|
NULL,
|
2007-08-15 23:39:50 +02:00
|
|
|
estate->es_instrument);
|
|
|
|
estate->es_trig_target_relations =
|
|
|
|
lappend(estate->es_trig_target_relations, rInfo);
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
2011-02-26 00:56:23 +01:00
|
|
|
/*
|
|
|
|
* Currently, we don't need any index information in ResultRelInfos used
|
|
|
|
* only for triggers, so no need to call ExecOpenIndices.
|
|
|
|
*/
|
|
|
|
|
2007-08-15 23:39:50 +02:00
|
|
|
return rInfo;
|
|
|
|
}
|
|
|
|
|
2017-05-16 18:46:32 +02:00
|
|
|
/*
|
|
|
|
* Close any relations that have been opened by ExecGetTriggerResultRel().
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecCleanUpTriggerState(EState *estate)
|
|
|
|
{
|
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
foreach(l, estate->es_trig_target_relations)
|
|
|
|
{
|
|
|
|
ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
|
|
|
|
|
|
|
|
/* Close indices and then the relation itself */
|
|
|
|
ExecCloseIndices(resultRelInfo);
|
|
|
|
heap_close(resultRelInfo->ri_RelationDesc, NoLock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-01-22 03:23:21 +01:00
|
|
|
/*
|
|
|
|
* ExecContextForcesOids
|
|
|
|
*
|
Restructure SELECT INTO's parsetree representation into CreateTableAsStmt.
Making this operation look like a utility statement seems generally a good
idea, and particularly so in light of the desire to provide command
triggers for utility statements. The original choice of representing it as
SELECT with an IntoClause appendage had metastasized into rather a lot of
places, unfortunately, so that this patch is a great deal more complicated
than one might at first expect.
In particular, keeping EXPLAIN working for SELECT INTO and CREATE TABLE AS
subcommands required restructuring some EXPLAIN-related APIs. Add-on code
that calls ExplainOnePlan or ExplainOneUtility, or uses
ExplainOneQuery_hook, will need adjustment.
Also, the cases PREPARE ... SELECT INTO and CREATE RULE ... SELECT INTO,
which formerly were accepted though undocumented, are no longer accepted.
The PREPARE case can be replaced with use of CREATE TABLE AS EXECUTE.
The CREATE RULE case doesn't seem to have much real-world use (since the
rule would work only once before failing with "table already exists"),
so we'll not bother with that one.
Both SELECT INTO and CREATE TABLE AS still return a command tag of
"SELECT nnnn". There was some discussion of returning "CREATE TABLE nnnn",
but for the moment backwards compatibility wins the day.
Andres Freund and Tom Lane
2012-03-20 02:37:19 +01:00
|
|
|
* This is pretty grotty: when doing INSERT, UPDATE, or CREATE TABLE AS,
|
2004-01-22 03:23:21 +01:00
|
|
|
* we need to ensure that result tuples have space for an OID iff they are
|
|
|
|
* going to be stored into a relation that has OIDs. In other contexts
|
|
|
|
* we are free to choose whether to leave space for OIDs in result tuples
|
|
|
|
* (we generally don't want to, but we do if a physical-tlist optimization
|
2017-08-16 06:22:32 +02:00
|
|
|
* is possible). This routine checks the plan context and returns true if the
|
|
|
|
* choice is forced, false if the choice is not forced. In the true case,
|
2004-01-22 03:23:21 +01:00
|
|
|
* *hasoids is set to the required value.
|
|
|
|
*
|
|
|
|
* One reason this is ugly is that all plan nodes in the plan tree will emit
|
|
|
|
* tuples with space for an OID, though we really only need the topmost node
|
|
|
|
* to do so. However, node types like Sort don't project new tuples but just
|
|
|
|
* return their inputs, and in those cases the requirement propagates down
|
|
|
|
* to the input node. Eventually we might make this code smart enough to
|
|
|
|
* recognize how far down the requirement really goes, but for now we just
|
|
|
|
* make all plan nodes do the same thing if the top level forces the choice.
|
|
|
|
*
|
2009-10-10 03:43:50 +02:00
|
|
|
* We assume that if we are generating tuples for INSERT or UPDATE,
|
|
|
|
* estate->es_result_relation_info is already set up to describe the target
|
|
|
|
* relation. Note that in an UPDATE that spans an inheritance tree, some of
|
|
|
|
* the target relations may have OIDs and some not. We have to make the
|
|
|
|
* decisions on a per-relation basis as we initialize each of the subplans of
|
|
|
|
* the ModifyTable node, so ModifyTable has to set es_result_relation_info
|
|
|
|
* while initializing each subplan.
|
2004-01-22 03:23:21 +01:00
|
|
|
*
|
Restructure SELECT INTO's parsetree representation into CreateTableAsStmt.
Making this operation look like a utility statement seems generally a good
idea, and particularly so in light of the desire to provide command
triggers for utility statements. The original choice of representing it as
SELECT with an IntoClause appendage had metastasized into rather a lot of
places, unfortunately, so that this patch is a great deal more complicated
than one might at first expect.
In particular, keeping EXPLAIN working for SELECT INTO and CREATE TABLE AS
subcommands required restructuring some EXPLAIN-related APIs. Add-on code
that calls ExplainOnePlan or ExplainOneUtility, or uses
ExplainOneQuery_hook, will need adjustment.
Also, the cases PREPARE ... SELECT INTO and CREATE RULE ... SELECT INTO,
which formerly were accepted though undocumented, are no longer accepted.
The PREPARE case can be replaced with use of CREATE TABLE AS EXECUTE.
The CREATE RULE case doesn't seem to have much real-world use (since the
rule would work only once before failing with "table already exists"),
so we'll not bother with that one.
Both SELECT INTO and CREATE TABLE AS still return a command tag of
"SELECT nnnn". There was some discussion of returning "CREATE TABLE nnnn",
but for the moment backwards compatibility wins the day.
Andres Freund and Tom Lane
2012-03-20 02:37:19 +01:00
|
|
|
* CREATE TABLE AS is even uglier, because we don't have the target relation's
|
|
|
|
* descriptor available when this code runs; we have to look aside at the
|
|
|
|
* flags passed to ExecutorStart().
|
2004-01-22 03:23:21 +01:00
|
|
|
*/
|
|
|
|
bool
|
|
|
|
ExecContextForcesOids(PlanState *planstate, bool *hasoids)
|
|
|
|
{
|
2009-10-10 03:43:50 +02:00
|
|
|
ResultRelInfo *ri = planstate->state->es_result_relation_info;
|
|
|
|
|
|
|
|
if (ri != NULL)
|
2004-01-22 03:23:21 +01:00
|
|
|
{
|
2009-10-10 03:43:50 +02:00
|
|
|
Relation rel = ri->ri_RelationDesc;
|
2004-01-22 03:23:21 +01:00
|
|
|
|
2009-10-10 03:43:50 +02:00
|
|
|
if (rel != NULL)
|
2004-01-22 03:23:21 +01:00
|
|
|
{
|
2009-10-10 03:43:50 +02:00
|
|
|
*hasoids = rel->rd_rel->relhasoids;
|
|
|
|
return true;
|
2004-01-22 03:23:21 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Restructure SELECT INTO's parsetree representation into CreateTableAsStmt.
Making this operation look like a utility statement seems generally a good
idea, and particularly so in light of the desire to provide command
triggers for utility statements. The original choice of representing it as
SELECT with an IntoClause appendage had metastasized into rather a lot of
places, unfortunately, so that this patch is a great deal more complicated
than one might at first expect.
In particular, keeping EXPLAIN working for SELECT INTO and CREATE TABLE AS
subcommands required restructuring some EXPLAIN-related APIs. Add-on code
that calls ExplainOnePlan or ExplainOneUtility, or uses
ExplainOneQuery_hook, will need adjustment.
Also, the cases PREPARE ... SELECT INTO and CREATE RULE ... SELECT INTO,
which formerly were accepted though undocumented, are no longer accepted.
The PREPARE case can be replaced with use of CREATE TABLE AS EXECUTE.
The CREATE RULE case doesn't seem to have much real-world use (since the
rule would work only once before failing with "table already exists"),
so we'll not bother with that one.
Both SELECT INTO and CREATE TABLE AS still return a command tag of
"SELECT nnnn". There was some discussion of returning "CREATE TABLE nnnn",
but for the moment backwards compatibility wins the day.
Andres Freund and Tom Lane
2012-03-20 02:37:19 +01:00
|
|
|
if (planstate->state->es_top_eflags & EXEC_FLAG_WITH_OIDS)
|
2009-10-10 03:43:50 +02:00
|
|
|
{
|
Restructure SELECT INTO's parsetree representation into CreateTableAsStmt.
Making this operation look like a utility statement seems generally a good
idea, and particularly so in light of the desire to provide command
triggers for utility statements. The original choice of representing it as
SELECT with an IntoClause appendage had metastasized into rather a lot of
places, unfortunately, so that this patch is a great deal more complicated
than one might at first expect.
In particular, keeping EXPLAIN working for SELECT INTO and CREATE TABLE AS
subcommands required restructuring some EXPLAIN-related APIs. Add-on code
that calls ExplainOnePlan or ExplainOneUtility, or uses
ExplainOneQuery_hook, will need adjustment.
Also, the cases PREPARE ... SELECT INTO and CREATE RULE ... SELECT INTO,
which formerly were accepted though undocumented, are no longer accepted.
The PREPARE case can be replaced with use of CREATE TABLE AS EXECUTE.
The CREATE RULE case doesn't seem to have much real-world use (since the
rule would work only once before failing with "table already exists"),
so we'll not bother with that one.
Both SELECT INTO and CREATE TABLE AS still return a command tag of
"SELECT nnnn". There was some discussion of returning "CREATE TABLE nnnn",
but for the moment backwards compatibility wins the day.
Andres Freund and Tom Lane
2012-03-20 02:37:19 +01:00
|
|
|
*hasoids = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (planstate->state->es_top_eflags & EXEC_FLAG_WITHOUT_OIDS)
|
|
|
|
{
|
|
|
|
*hasoids = false;
|
2009-10-10 03:43:50 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2004-01-22 03:23:21 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-02-26 00:56:23 +01:00
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecPostprocessPlan
|
|
|
|
*
|
|
|
|
* Give plan nodes a final chance to execute before shutdown
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ExecPostprocessPlan(EState *estate)
|
|
|
|
{
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure nodes run forward.
|
|
|
|
*/
|
|
|
|
estate->es_direction = ForwardScanDirection;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Run any secondary ModifyTable nodes to completion, in case the main
|
2014-05-06 18:12:18 +02:00
|
|
|
* query did not fetch all rows from them. (We do this to ensure that
|
2011-02-26 00:56:23 +01:00
|
|
|
* such nodes have predictable results.)
|
|
|
|
*/
|
|
|
|
foreach(lc, estate->es_auxmodifytables)
|
|
|
|
{
|
2011-04-10 17:42:00 +02:00
|
|
|
PlanState *ps = (PlanState *) lfirst(lc);
|
2011-02-26 00:56:23 +01:00
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
TupleTableSlot *slot;
|
|
|
|
|
|
|
|
/* Reset the per-output-tuple exprcontext each time */
|
|
|
|
ResetPerTupleExprContext(estate);
|
|
|
|
|
|
|
|
slot = ExecProcNode(ps);
|
|
|
|
|
|
|
|
if (TupIsNull(slot))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/* ----------------------------------------------------------------
|
2002-12-15 17:17:59 +01:00
|
|
|
* ExecEndPlan
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2002-12-05 16:50:39 +01:00
|
|
|
* Cleans up the query plan -- closes files and frees up storage
|
2002-12-15 22:01:34 +01:00
|
|
|
*
|
|
|
|
* NOTE: we are no longer very worried about freeing storage per se
|
|
|
|
* in this code; FreeExecutorState should be guaranteed to release all
|
|
|
|
* memory that needs to be released. What we are worried about doing
|
|
|
|
* is closing relations and dropping buffer pins. Thus, for example,
|
|
|
|
* tuple tables must be cleared or dropped to ensure pins are released.
|
1996-07-09 08:22:35 +02:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
2007-02-27 02:11:26 +01:00
|
|
|
static void
|
2003-08-08 23:42:59 +02:00
|
|
|
ExecEndPlan(PlanState *planstate, EState *estate)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-11-12 01:37:02 +01:00
|
|
|
ResultRelInfo *resultRelInfo;
|
|
|
|
int i;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2000-06-10 07:16:38 +02:00
|
|
|
* shut down the node-type-specific query processing
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2002-12-05 16:50:39 +01:00
|
|
|
ExecEndNode(planstate);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2007-02-27 02:11:26 +01:00
|
|
|
/*
|
|
|
|
* for subplans too
|
|
|
|
*/
|
|
|
|
foreach(l, estate->es_subplanstates)
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
PlanState *subplanstate = (PlanState *) lfirst(l);
|
2007-02-27 02:11:26 +01:00
|
|
|
|
|
|
|
ExecEndNode(subplanstate);
|
|
|
|
}
|
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2009-09-27 22:09:58 +02:00
|
|
|
* destroy the executor's tuple table. Actually we only care about
|
2010-02-26 03:01:40 +01:00
|
|
|
* releasing buffer pins and tupdesc refcounts; there's no need to pfree
|
|
|
|
* the TupleTableSlots, since the containing memory context is about to go
|
|
|
|
* away anyway.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2009-09-27 22:09:58 +02:00
|
|
|
ExecResetTupleTable(estate->es_tupleTable, false);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* close the result relation(s) if any, but hold locks until xact commit.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2000-11-12 01:37:02 +01:00
|
|
|
resultRelInfo = estate->es_result_relations;
|
|
|
|
for (i = estate->es_num_result_relations; i > 0; i--)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2000-11-12 01:37:02 +01:00
|
|
|
/* Close indices and then the relation itself */
|
|
|
|
ExecCloseIndices(resultRelInfo);
|
|
|
|
heap_close(resultRelInfo->ri_RelationDesc, NoLock);
|
|
|
|
resultRelInfo++;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
2017-05-01 14:23:01 +02:00
|
|
|
/* Close the root target relation(s). */
|
|
|
|
resultRelInfo = estate->es_root_result_relations;
|
|
|
|
for (i = estate->es_num_root_result_relations; i > 0; i--)
|
|
|
|
{
|
|
|
|
heap_close(resultRelInfo->ri_RelationDesc, NoLock);
|
|
|
|
resultRelInfo++;
|
|
|
|
}
|
|
|
|
|
2017-05-16 18:46:32 +02:00
|
|
|
/* likewise close any trigger target relations */
|
|
|
|
ExecCleanUpTriggerState(estate);
|
2007-08-15 23:39:50 +02:00
|
|
|
|
2000-02-03 01:02:58 +01:00
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* close any relations selected FOR [KEY] UPDATE/SHARE, again keeping
|
|
|
|
* locks
|
2000-02-03 01:02:58 +01:00
|
|
|
*/
|
2005-08-01 22:31:16 +02:00
|
|
|
foreach(l, estate->es_rowMarks)
|
2000-02-03 01:02:58 +01:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
ExecRowMark *erm = (ExecRowMark *) lfirst(l);
|
2000-02-03 01:02:58 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
if (erm->relation)
|
|
|
|
heap_close(erm->relation, NoLock);
|
2000-02-03 01:02:58 +01:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
1997-09-07 07:04:48 +02:00
|
|
|
* ExecutePlan
|
|
|
|
*
|
2013-01-25 00:34:00 +01:00
|
|
|
* Processes the query plan until we have retrieved 'numberTuples' tuples,
|
2008-10-31 22:07:55 +01:00
|
|
|
* moving in the specified direction.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2008-10-31 22:07:55 +01:00
|
|
|
* Runs to completion if numberTuples is 0
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2000-10-26 23:38:24 +02:00
|
|
|
* Note: the ctid attribute is a 'junk' attribute that is removed before the
|
|
|
|
* user can see it
|
1996-07-09 08:22:35 +02:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
2008-10-31 22:07:55 +01:00
|
|
|
static void
|
1997-09-08 23:56:23 +02:00
|
|
|
ExecutePlan(EState *estate,
|
2003-08-08 23:42:59 +02:00
|
|
|
PlanState *planstate,
|
2015-10-16 17:56:02 +02:00
|
|
|
bool use_parallel_mode,
|
1997-09-07 07:04:48 +02:00
|
|
|
CmdType operation,
|
2009-10-10 03:43:50 +02:00
|
|
|
bool sendTuples,
|
Widen query numbers-of-tuples-processed counters to uint64.
This patch widens SPI_processed, EState's es_processed field, PortalData's
portalPos field, FuncCallContext's call_cntr and max_calls fields,
ExecutorRun's count argument, PortalRunFetch's result, and the max number
of rows in a SPITupleTable to uint64, and deals with (I hope) all the
ensuing fallout. Some of these values were declared uint32 before, and
others "long".
I also removed PortalData's posOverflow field, since that logic seems
pretty useless given that portalPos is now always 64 bits.
The user-visible results are that command tags for SELECT etc will
correctly report tuple counts larger than 4G, as will plpgsql's GET
GET DIAGNOSTICS ... ROW_COUNT command. Queries processing more tuples
than that are still not exactly the norm, but they're becoming more
common.
Most values associated with FETCH/MOVE distances, such as PortalRun's count
argument and the count argument of most SPI functions that have one, remain
declared as "long". It's not clear whether it would be worth promoting
those to int64; but it would definitely be a large dollop of additional
API churn on top of this, and it would only help 32-bit platforms which
seem relatively less likely to see any benefit.
Andreas Scherbaum, reviewed by Christian Ullrich, additional hacking by me
2016-03-12 22:05:10 +01:00
|
|
|
uint64 numberTuples,
|
1997-09-07 07:04:48 +02:00
|
|
|
ScanDirection direction,
|
2017-03-23 18:05:48 +01:00
|
|
|
DestReceiver *dest,
|
|
|
|
bool execute_once)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2003-08-04 02:43:34 +02:00
|
|
|
TupleTableSlot *slot;
|
Widen query numbers-of-tuples-processed counters to uint64.
This patch widens SPI_processed, EState's es_processed field, PortalData's
portalPos field, FuncCallContext's call_cntr and max_calls fields,
ExecutorRun's count argument, PortalRunFetch's result, and the max number
of rows in a SPITupleTable to uint64, and deals with (I hope) all the
ensuing fallout. Some of these values were declared uint32 before, and
others "long".
I also removed PortalData's posOverflow field, since that logic seems
pretty useless given that portalPos is now always 64 bits.
The user-visible results are that command tags for SELECT etc will
correctly report tuple counts larger than 4G, as will plpgsql's GET
GET DIAGNOSTICS ... ROW_COUNT command. Queries processing more tuples
than that are still not exactly the norm, but they're becoming more
common.
Most values associated with FETCH/MOVE distances, such as PortalRun's count
argument and the count argument of most SPI functions that have one, remain
declared as "long". It's not clear whether it would be worth promoting
those to int64; but it would definitely be a large dollop of additional
API churn on top of this, and it would only help 32-bit platforms which
seem relatively less likely to see any benefit.
Andreas Scherbaum, reviewed by Christian Ullrich, additional hacking by me
2016-03-12 22:05:10 +01:00
|
|
|
uint64 current_tuple_count;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
1999-05-25 18:15:34 +02:00
|
|
|
* initialize local variables
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
current_tuple_count = 0;
|
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
|
|
|
* Set the direction.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
estate->es_direction = direction;
|
|
|
|
|
2015-10-16 17:56:02 +02:00
|
|
|
/*
|
2017-03-23 18:05:48 +01:00
|
|
|
* If the plan might potentially be executed multiple times, we must force
|
2017-10-05 17:34:38 +02:00
|
|
|
* it to run without parallelism, because we might exit early.
|
2015-10-16 17:56:02 +02:00
|
|
|
*/
|
2017-10-05 17:34:38 +02:00
|
|
|
if (!execute_once)
|
2015-10-16 17:56:02 +02:00
|
|
|
use_parallel_mode = false;
|
|
|
|
|
2017-10-27 16:04:01 +02:00
|
|
|
estate->es_use_parallel_mode = use_parallel_mode;
|
2015-10-16 17:56:02 +02:00
|
|
|
if (use_parallel_mode)
|
|
|
|
EnterParallelMode();
|
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Loop until we've processed the proper number of tuples from the plan.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
|
|
|
for (;;)
|
|
|
|
{
|
2001-01-22 01:50:07 +01:00
|
|
|
/* Reset the per-output-tuple exprcontext */
|
|
|
|
ResetPerTupleExprContext(estate);
|
1999-05-25 18:15:34 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
1999-05-25 18:15:34 +02:00
|
|
|
* Execute the plan and obtain a tuple
|
1998-10-01 04:04:01 +02:00
|
|
|
*/
|
2009-10-12 20:10:51 +02:00
|
|
|
slot = ExecProcNode(planstate);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
1999-05-25 18:15:34 +02:00
|
|
|
* if the tuple is null, then we assume there is nothing more to
|
2008-10-31 22:07:55 +01:00
|
|
|
* process so we just end the loop...
|
1998-10-01 04:04:01 +02:00
|
|
|
*/
|
2009-10-12 20:10:51 +02:00
|
|
|
if (TupIsNull(slot))
|
2015-10-16 17:56:02 +02:00
|
|
|
{
|
|
|
|
/* Allow nodes to release or shut down resources. */
|
|
|
|
(void) ExecShutdownNode(planstate);
|
1998-10-01 04:04:01 +02:00
|
|
|
break;
|
2015-10-16 17:56:02 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2008-04-21 05:49:45 +02:00
|
|
|
* If we have a junk filter, then project a new tuple with the junk
|
2005-10-15 04:49:52 +02:00
|
|
|
* removed.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2001-05-27 22:48:51 +02:00
|
|
|
* Store this new "clean" tuple in the junkfilter's resultSlot.
|
2005-10-15 04:49:52 +02:00
|
|
|
* (Formerly, we stored it back over the "dirty" tuple, which is WRONG
|
|
|
|
* because that tuple slot has the wrong descriptor.)
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2009-10-12 20:10:51 +02:00
|
|
|
if (estate->es_junkFilter != NULL)
|
|
|
|
slot = ExecFilterJunk(estate->es_junkFilter, slot);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* If we are supposed to send the tuple somewhere, do so. (In
|
|
|
|
* practice, this is probably always the case at this point.)
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2009-10-10 03:43:50 +02:00
|
|
|
if (sendTuples)
|
2016-06-06 20:52:58 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If we are not able to send the tuple, we assume the destination
|
|
|
|
* has closed and no more tuples can be sent. If that's the case,
|
|
|
|
* end the loop.
|
|
|
|
*/
|
2017-09-07 18:06:23 +02:00
|
|
|
if (!dest->receiveSlot(slot, dest))
|
2016-06-06 20:52:58 +02:00
|
|
|
break;
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2009-10-10 03:43:50 +02:00
|
|
|
/*
|
|
|
|
* Count tuples processed, if this is a SELECT. (For other operation
|
|
|
|
* types, the ModifyTable plan node must count the appropriate
|
|
|
|
* events.)
|
|
|
|
*/
|
|
|
|
if (operation == CMD_SELECT)
|
|
|
|
(estate->es_processed)++;
|
1999-05-25 18:15:34 +02:00
|
|
|
|
1999-02-22 20:40:10 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* check our tuple count.. if we've processed the proper number then
|
|
|
|
* quit, else loop again and process more tuples. Zero numberTuples
|
|
|
|
* means no limit.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2000-10-26 23:38:24 +02:00
|
|
|
current_tuple_count++;
|
2003-01-09 00:32:29 +01:00
|
|
|
if (numberTuples && numberTuples == current_tuple_count)
|
2017-03-23 18:05:48 +01:00
|
|
|
{
|
|
|
|
/* Allow nodes to release or shut down resources. */
|
|
|
|
(void) ExecShutdownNode(planstate);
|
1997-09-07 07:04:48 +02:00
|
|
|
break;
|
2017-03-23 18:05:48 +01:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2015-10-16 17:56:02 +02:00
|
|
|
|
|
|
|
if (use_parallel_mode)
|
|
|
|
ExitParallelMode();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-08-22 05:12:19 +02:00
|
|
|
|
2006-08-12 04:52:06 +02:00
|
|
|
/*
|
|
|
|
* ExecRelCheck --- check that tuple meets constraints for result relation
|
Provide database object names as separate fields in error messages.
This patch addresses the problem that applications currently have to
extract object names from possibly-localized textual error messages,
if they want to know for example which index caused a UNIQUE_VIOLATION
failure. It adds new error message fields to the wire protocol, which
can carry the name of a table, table column, data type, or constraint
associated with the error. (Since the protocol spec has always instructed
clients to ignore unrecognized field types, this should not create any
compatibility problem.)
Support for providing these new fields has been added to just a limited set
of error reports (mainly, those in the "integrity constraint violation"
SQLSTATE class), but we will doubtless add them to more calls in future.
Pavel Stehule, reviewed and extensively revised by Peter Geoghegan, with
additional hacking by Tom Lane.
2013-01-29 23:06:26 +01:00
|
|
|
*
|
|
|
|
* Returns NULL if OK, else name of failed check constraint
|
2006-08-12 04:52:06 +02:00
|
|
|
*/
|
2003-07-21 19:05:12 +02:00
|
|
|
static const char *
|
2000-11-12 01:37:02 +01:00
|
|
|
ExecRelCheck(ResultRelInfo *resultRelInfo,
|
|
|
|
TupleTableSlot *slot, EState *estate)
|
1997-08-22 16:28:20 +02:00
|
|
|
{
|
2000-11-12 01:37:02 +01:00
|
|
|
Relation rel = resultRelInfo->ri_RelationDesc;
|
1997-09-08 04:41:22 +02:00
|
|
|
int ncheck = rel->rd_att->constr->num_check;
|
|
|
|
ConstrCheck *check = rel->rd_att->constr->check;
|
2000-08-06 06:26:40 +02:00
|
|
|
ExprContext *econtext;
|
2000-08-22 06:06:22 +02:00
|
|
|
MemoryContext oldContext;
|
1997-09-08 04:41:22 +02:00
|
|
|
int i;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-11-12 01:37:02 +01:00
|
|
|
/*
|
|
|
|
* If first time through for this result relation, build expression
|
2005-10-15 04:49:52 +02:00
|
|
|
* nodetrees for rel's constraint expressions. Keep them in the per-query
|
|
|
|
* memory context so they'll survive throughout the query.
|
2000-11-12 01:37:02 +01:00
|
|
|
*/
|
|
|
|
if (resultRelInfo->ri_ConstraintExprs == NULL)
|
|
|
|
{
|
|
|
|
oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
resultRelInfo->ri_ConstraintExprs =
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
(ExprState **) palloc(ncheck * sizeof(ExprState *));
|
2000-11-12 01:37:02 +01:00
|
|
|
for (i = 0; i < ncheck; i++)
|
|
|
|
{
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
Expr *checkconstr;
|
|
|
|
|
|
|
|
checkconstr = stringToNode(check[i].ccbin);
|
|
|
|
resultRelInfo->ri_ConstraintExprs[i] =
|
|
|
|
ExecPrepareExpr(checkconstr, estate);
|
2000-11-12 01:37:02 +01:00
|
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldContext);
|
|
|
|
}
|
|
|
|
|
2000-08-06 06:26:40 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* We will use the EState's per-tuple context for evaluating constraint
|
|
|
|
* expressions (creating it if it's not already there).
|
2000-08-06 06:26:40 +02:00
|
|
|
*/
|
2001-01-22 01:50:07 +01:00
|
|
|
econtext = GetPerTupleExprContext(estate);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-08-06 06:26:40 +02:00
|
|
|
/* Arrange for econtext's scan tuple to be the tuple under test */
|
|
|
|
econtext->ecxt_scantuple = slot;
|
|
|
|
|
|
|
|
/* And evaluate the constraints */
|
1997-09-07 07:04:48 +02:00
|
|
|
for (i = 0; i < ncheck; i++)
|
|
|
|
{
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
ExprState *checkconstr = resultRelInfo->ri_ConstraintExprs[i];
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-20 00:55:03 +01:00
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* NOTE: SQL specifies that a NULL result from a constraint expression
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
* is not to be treated as a failure. Therefore, use ExecCheck not
|
|
|
|
* ExecQual.
|
2000-01-20 00:55:03 +01:00
|
|
|
*/
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
if (!ExecCheck(checkconstr, econtext))
|
1998-09-01 05:29:17 +02:00
|
|
|
return check[i].ccname;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
2000-08-06 06:26:40 +02:00
|
|
|
/* NULL result means no error */
|
2003-07-21 19:05:12 +02:00
|
|
|
return NULL;
|
1997-08-22 16:28:20 +02:00
|
|
|
}
|
|
|
|
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
/*
|
|
|
|
* ExecPartitionCheck --- check that tuple meets the partition constraint.
|
2017-11-15 16:23:28 +01:00
|
|
|
*
|
2018-06-11 22:53:33 +02:00
|
|
|
* Returns true if it meets the partition constraint. If the constraint
|
|
|
|
* fails and we're asked to emit to error, do so and don't return; otherwise
|
|
|
|
* return false.
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
*/
|
2018-01-05 21:18:03 +01:00
|
|
|
bool
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
|
2018-06-11 22:53:33 +02:00
|
|
|
EState *estate, bool emitError)
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
{
|
|
|
|
ExprContext *econtext;
|
2018-06-30 18:25:49 +02:00
|
|
|
bool success;
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If first time through, build expression state tree for the partition
|
|
|
|
* check expression. Keep it in the per-query memory context so they'll
|
|
|
|
* survive throughout the query.
|
|
|
|
*/
|
|
|
|
if (resultRelInfo->ri_PartitionCheckExpr == NULL)
|
|
|
|
{
|
2017-01-24 16:20:02 +01:00
|
|
|
List *qual = resultRelInfo->ri_PartitionCheck;
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
resultRelInfo->ri_PartitionCheckExpr = ExecPrepareCheck(qual, estate);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We will use the EState's per-tuple context for evaluating constraint
|
|
|
|
* expressions (creating it if it's not already there).
|
|
|
|
*/
|
|
|
|
econtext = GetPerTupleExprContext(estate);
|
|
|
|
|
|
|
|
/* Arrange for econtext's scan tuple to be the tuple under test */
|
|
|
|
econtext->ecxt_scantuple = slot;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* As in case of the catalogued constraints, we treat a NULL result as
|
|
|
|
* success here, not a failure.
|
|
|
|
*/
|
2018-06-11 22:53:33 +02:00
|
|
|
success = ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext);
|
|
|
|
|
|
|
|
/* if asked to emit error, don't actually return on failure */
|
|
|
|
if (!success && emitError)
|
|
|
|
ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
|
|
|
|
|
|
|
|
return success;
|
2018-01-05 21:18:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecPartitionCheckEmitError - Form and emit an error message after a failed
|
|
|
|
* partition constraint check.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecPartitionCheckEmitError(ResultRelInfo *resultRelInfo,
|
|
|
|
TupleTableSlot *slot,
|
|
|
|
EState *estate)
|
|
|
|
{
|
|
|
|
Relation rel = resultRelInfo->ri_RelationDesc;
|
|
|
|
Relation orig_rel = rel;
|
|
|
|
TupleDesc tupdesc = RelationGetDescr(rel);
|
|
|
|
char *val_desc;
|
|
|
|
Bitmapset *modifiedCols;
|
|
|
|
Bitmapset *insertedCols;
|
|
|
|
Bitmapset *updatedCols;
|
2017-06-07 18:45:32 +02:00
|
|
|
|
2018-01-05 21:18:03 +01:00
|
|
|
/*
|
|
|
|
* Need to first convert the tuple to the root partitioned table's row
|
|
|
|
* type. For details, check similar comments in ExecConstraints().
|
|
|
|
*/
|
|
|
|
if (resultRelInfo->ri_PartitionRoot)
|
|
|
|
{
|
|
|
|
HeapTuple tuple = ExecFetchSlotTuple(slot);
|
|
|
|
TupleDesc old_tupdesc = RelationGetDescr(rel);
|
|
|
|
TupleConversionMap *map;
|
|
|
|
|
|
|
|
rel = resultRelInfo->ri_PartitionRoot;
|
|
|
|
tupdesc = RelationGetDescr(rel);
|
|
|
|
/* a reverse map */
|
|
|
|
map = convert_tuples_by_name(old_tupdesc, tupdesc,
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
if (map != NULL)
|
2017-06-07 18:45:32 +02:00
|
|
|
{
|
2018-01-05 21:18:03 +01:00
|
|
|
tuple = do_convert_tuple(tuple, map);
|
|
|
|
ExecSetSlotDescriptor(slot, tupdesc);
|
|
|
|
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
|
2017-06-07 18:45:32 +02:00
|
|
|
}
|
|
|
|
}
|
2018-01-05 21:18:03 +01:00
|
|
|
|
|
|
|
insertedCols = GetInsertedColumns(resultRelInfo, estate);
|
|
|
|
updatedCols = GetUpdatedColumns(resultRelInfo, estate);
|
|
|
|
modifiedCols = bms_union(insertedCols, updatedCols);
|
|
|
|
val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
|
|
|
|
slot,
|
|
|
|
tupdesc,
|
|
|
|
modifiedCols,
|
|
|
|
64);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
|
|
|
errmsg("new row for relation \"%s\" violates partition constraint",
|
|
|
|
RelationGetRelationName(orig_rel)),
|
|
|
|
val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
}
|
|
|
|
|
2017-01-04 20:36:34 +01:00
|
|
|
/*
|
|
|
|
* ExecConstraints - check constraints of the tuple in 'slot'
|
|
|
|
*
|
2018-06-11 22:53:33 +02:00
|
|
|
* This checks the traditional NOT NULL and check constraints.
|
|
|
|
*
|
|
|
|
* The partition constraint is *NOT* checked.
|
2017-01-04 20:36:34 +01:00
|
|
|
*
|
|
|
|
* Note: 'slot' contains the tuple to check the constraints of, which may
|
2017-04-10 18:20:08 +02:00
|
|
|
* have been converted from the original input tuple after tuple routing.
|
2018-06-11 22:53:33 +02:00
|
|
|
* 'resultRelInfo' is the final result relation, after tuple routing.
|
2017-01-04 20:36:34 +01:00
|
|
|
*/
|
1998-11-27 20:52:36 +01:00
|
|
|
void
|
2003-07-21 19:05:12 +02:00
|
|
|
ExecConstraints(ResultRelInfo *resultRelInfo,
|
2018-06-11 22:53:33 +02:00
|
|
|
TupleTableSlot *slot, EState *estate)
|
1997-08-22 16:28:20 +02:00
|
|
|
{
|
2000-11-12 01:37:02 +01:00
|
|
|
Relation rel = resultRelInfo->ri_RelationDesc;
|
2013-11-07 20:41:36 +01:00
|
|
|
TupleDesc tupdesc = RelationGetDescr(rel);
|
|
|
|
TupleConstr *constr = tupdesc->constr;
|
2015-05-24 03:35:49 +02:00
|
|
|
Bitmapset *modifiedCols;
|
|
|
|
Bitmapset *insertedCols;
|
|
|
|
Bitmapset *updatedCols;
|
2000-08-06 06:26:40 +02:00
|
|
|
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
Assert(constr || resultRelInfo->ri_PartitionCheck);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
if (constr && constr->has_not_null)
|
1997-08-22 16:28:20 +02:00
|
|
|
{
|
2013-11-07 20:41:36 +01:00
|
|
|
int natts = tupdesc->natts;
|
1997-09-08 04:41:22 +02:00
|
|
|
int attrChk;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-08-06 06:26:40 +02:00
|
|
|
for (attrChk = 1; attrChk <= natts; attrChk++)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2017-08-20 20:19:07 +02:00
|
|
|
Form_pg_attribute att = TupleDescAttr(tupdesc, attrChk - 1);
|
|
|
|
|
|
|
|
if (att->attnotnull && slot_attisnull(slot, attrChk))
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
{
|
|
|
|
char *val_desc;
|
2017-01-04 20:36:34 +01:00
|
|
|
Relation orig_rel = rel;
|
2017-04-10 18:20:08 +02:00
|
|
|
TupleDesc orig_tupdesc = RelationGetDescr(rel);
|
2017-01-04 20:36:34 +01:00
|
|
|
|
|
|
|
/*
|
2017-04-10 18:20:08 +02:00
|
|
|
* If the tuple has been routed, it's been converted to the
|
|
|
|
* partition's rowtype, which might differ from the root
|
|
|
|
* table's. We must convert it back to the root table's
|
|
|
|
* rowtype so that val_desc shown error message matches the
|
|
|
|
* input tuple.
|
2017-01-04 20:36:34 +01:00
|
|
|
*/
|
|
|
|
if (resultRelInfo->ri_PartitionRoot)
|
|
|
|
{
|
2017-04-10 18:20:08 +02:00
|
|
|
HeapTuple tuple = ExecFetchSlotTuple(slot);
|
2017-05-17 22:31:56 +02:00
|
|
|
TupleConversionMap *map;
|
2017-04-10 18:20:08 +02:00
|
|
|
|
2017-01-04 20:36:34 +01:00
|
|
|
rel = resultRelInfo->ri_PartitionRoot;
|
|
|
|
tupdesc = RelationGetDescr(rel);
|
2017-04-10 18:20:08 +02:00
|
|
|
/* a reverse map */
|
|
|
|
map = convert_tuples_by_name(orig_tupdesc, tupdesc,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
gettext_noop("could not convert row type"));
|
2017-04-10 18:20:08 +02:00
|
|
|
if (map != NULL)
|
|
|
|
{
|
|
|
|
tuple = do_convert_tuple(tuple, map);
|
2017-07-25 00:08:08 +02:00
|
|
|
ExecSetSlotDescriptor(slot, tupdesc);
|
2017-04-10 18:20:08 +02:00
|
|
|
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
|
|
|
|
}
|
2017-01-04 20:36:34 +01:00
|
|
|
}
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
|
2015-05-08 00:20:46 +02:00
|
|
|
insertedCols = GetInsertedColumns(resultRelInfo, estate);
|
|
|
|
updatedCols = GetUpdatedColumns(resultRelInfo, estate);
|
|
|
|
modifiedCols = bms_union(insertedCols, updatedCols);
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
|
2017-04-10 18:20:08 +02:00
|
|
|
slot,
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
tupdesc,
|
|
|
|
modifiedCols,
|
|
|
|
64);
|
|
|
|
|
2003-07-21 19:05:12 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_NOT_NULL_VIOLATION),
|
2003-09-25 08:58:07 +02:00
|
|
|
errmsg("null value in column \"%s\" violates not-null constraint",
|
2017-08-20 20:19:07 +02:00
|
|
|
NameStr(att->attname)),
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
|
2017-01-04 20:36:34 +01:00
|
|
|
errtablecol(orig_rel, attrChk)));
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
if (constr && constr->num_check > 0)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2003-08-04 02:43:34 +02:00
|
|
|
const char *failed;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-11-12 01:37:02 +01:00
|
|
|
if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
{
|
|
|
|
char *val_desc;
|
2017-01-04 20:36:34 +01:00
|
|
|
Relation orig_rel = rel;
|
|
|
|
|
|
|
|
/* See the comment above. */
|
|
|
|
if (resultRelInfo->ri_PartitionRoot)
|
|
|
|
{
|
2017-04-10 18:20:08 +02:00
|
|
|
HeapTuple tuple = ExecFetchSlotTuple(slot);
|
|
|
|
TupleDesc old_tupdesc = RelationGetDescr(rel);
|
2017-05-17 22:31:56 +02:00
|
|
|
TupleConversionMap *map;
|
2017-04-10 18:20:08 +02:00
|
|
|
|
2017-01-04 20:36:34 +01:00
|
|
|
rel = resultRelInfo->ri_PartitionRoot;
|
|
|
|
tupdesc = RelationGetDescr(rel);
|
2017-04-10 18:20:08 +02:00
|
|
|
/* a reverse map */
|
|
|
|
map = convert_tuples_by_name(old_tupdesc, tupdesc,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
gettext_noop("could not convert row type"));
|
2017-04-10 18:20:08 +02:00
|
|
|
if (map != NULL)
|
|
|
|
{
|
|
|
|
tuple = do_convert_tuple(tuple, map);
|
2017-07-25 00:08:08 +02:00
|
|
|
ExecSetSlotDescriptor(slot, tupdesc);
|
2017-04-10 18:20:08 +02:00
|
|
|
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
|
|
|
|
}
|
2017-01-04 20:36:34 +01:00
|
|
|
}
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
|
2015-05-08 00:20:46 +02:00
|
|
|
insertedCols = GetInsertedColumns(resultRelInfo, estate);
|
|
|
|
updatedCols = GetUpdatedColumns(resultRelInfo, estate);
|
|
|
|
modifiedCols = bms_union(insertedCols, updatedCols);
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
|
2017-04-10 18:20:08 +02:00
|
|
|
slot,
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
tupdesc,
|
|
|
|
modifiedCols,
|
|
|
|
64);
|
2003-07-21 19:05:12 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
2003-09-25 08:58:07 +02:00
|
|
|
errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
|
2017-01-04 20:36:34 +01:00
|
|
|
RelationGetRelationName(orig_rel), failed),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
|
2017-01-04 20:36:34 +01:00
|
|
|
errtableconstraint(orig_rel, failed)));
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1997-08-22 05:12:19 +02:00
|
|
|
}
|
1999-01-29 10:23:17 +01:00
|
|
|
|
2013-07-18 23:10:16 +02:00
|
|
|
/*
|
|
|
|
* ExecWithCheckOptions -- check that tuple satisfies any WITH CHECK OPTIONs
|
2015-04-25 02:34:26 +02:00
|
|
|
* of the specified kind.
|
|
|
|
*
|
|
|
|
* Note that this needs to be called multiple times to ensure that all kinds of
|
|
|
|
* WITH CHECK OPTIONs are handled (both those from views which have the WITH
|
|
|
|
* CHECK OPTION set and from row level security policies). See ExecInsert()
|
|
|
|
* and ExecUpdate().
|
2013-07-18 23:10:16 +02:00
|
|
|
*/
|
|
|
|
void
|
2015-04-25 02:34:26 +02:00
|
|
|
ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
|
2013-07-18 23:10:16 +02:00
|
|
|
TupleTableSlot *slot, EState *estate)
|
|
|
|
{
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
Relation rel = resultRelInfo->ri_RelationDesc;
|
|
|
|
TupleDesc tupdesc = RelationGetDescr(rel);
|
2013-07-18 23:10:16 +02:00
|
|
|
ExprContext *econtext;
|
2014-05-06 18:12:18 +02:00
|
|
|
ListCell *l1,
|
|
|
|
*l2;
|
2013-07-18 23:10:16 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We will use the EState's per-tuple context for evaluating constraint
|
|
|
|
* expressions (creating it if it's not already there).
|
|
|
|
*/
|
|
|
|
econtext = GetPerTupleExprContext(estate);
|
|
|
|
|
|
|
|
/* Arrange for econtext's scan tuple to be the tuple under test */
|
|
|
|
econtext->ecxt_scantuple = slot;
|
|
|
|
|
|
|
|
/* Check each of the constraints */
|
|
|
|
forboth(l1, resultRelInfo->ri_WithCheckOptions,
|
|
|
|
l2, resultRelInfo->ri_WithCheckOptionExprs)
|
|
|
|
{
|
|
|
|
WithCheckOption *wco = (WithCheckOption *) lfirst(l1);
|
2014-05-06 18:12:18 +02:00
|
|
|
ExprState *wcoExpr = (ExprState *) lfirst(l2);
|
2013-07-18 23:10:16 +02:00
|
|
|
|
2015-04-25 02:34:26 +02:00
|
|
|
/*
|
|
|
|
* Skip any WCOs which are not the kind we are looking for at this
|
|
|
|
* time.
|
|
|
|
*/
|
|
|
|
if (wco->kind != kind)
|
|
|
|
continue;
|
|
|
|
|
2013-07-18 23:10:16 +02:00
|
|
|
/*
|
|
|
|
* WITH CHECK OPTION checks are intended to ensure that the new tuple
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
* is visible (in the case of a view) or that it passes the
|
2015-05-24 03:35:49 +02:00
|
|
|
* 'with-check' policy (in the case of row security). If the qual
|
|
|
|
* evaluates to NULL or FALSE, then the new tuple won't be included in
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
* the view or doesn't pass the 'with-check' policy for the table.
|
2013-07-18 23:10:16 +02:00
|
|
|
*/
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
if (!ExecQual(wcoExpr, econtext))
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
{
|
|
|
|
char *val_desc;
|
|
|
|
Bitmapset *modifiedCols;
|
2015-05-08 00:20:46 +02:00
|
|
|
Bitmapset *insertedCols;
|
|
|
|
Bitmapset *updatedCols;
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
|
2015-04-25 02:34:26 +02:00
|
|
|
switch (wco->kind)
|
|
|
|
{
|
2015-05-24 03:35:49 +02:00
|
|
|
/*
|
|
|
|
* For WITH CHECK OPTIONs coming from views, we might be
|
|
|
|
* able to provide the details on the row, depending on
|
|
|
|
* the permissions on the relation (that is, if the user
|
|
|
|
* could view it directly anyway). For RLS violations, we
|
|
|
|
* don't include the data since we don't know if the user
|
2017-06-21 10:55:07 +02:00
|
|
|
* should be able to view the tuple as that depends on the
|
|
|
|
* USING policy.
|
2015-05-24 03:35:49 +02:00
|
|
|
*/
|
2015-04-25 02:34:26 +02:00
|
|
|
case WCO_VIEW_CHECK:
|
2017-07-18 03:56:31 +02:00
|
|
|
/* See the comment in ExecConstraints(). */
|
|
|
|
if (resultRelInfo->ri_PartitionRoot)
|
|
|
|
{
|
|
|
|
HeapTuple tuple = ExecFetchSlotTuple(slot);
|
|
|
|
TupleDesc old_tupdesc = RelationGetDescr(rel);
|
|
|
|
TupleConversionMap *map;
|
|
|
|
|
|
|
|
rel = resultRelInfo->ri_PartitionRoot;
|
|
|
|
tupdesc = RelationGetDescr(rel);
|
|
|
|
/* a reverse map */
|
|
|
|
map = convert_tuples_by_name(old_tupdesc, tupdesc,
|
|
|
|
gettext_noop("could not convert row type"));
|
|
|
|
if (map != NULL)
|
|
|
|
{
|
|
|
|
tuple = do_convert_tuple(tuple, map);
|
2017-07-25 00:08:08 +02:00
|
|
|
ExecSetSlotDescriptor(slot, tupdesc);
|
2017-07-18 03:56:31 +02:00
|
|
|
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-08 00:20:46 +02:00
|
|
|
insertedCols = GetInsertedColumns(resultRelInfo, estate);
|
|
|
|
updatedCols = GetUpdatedColumns(resultRelInfo, estate);
|
|
|
|
modifiedCols = bms_union(insertedCols, updatedCols);
|
2015-04-25 02:34:26 +02:00
|
|
|
val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
|
|
|
|
slot,
|
|
|
|
tupdesc,
|
|
|
|
modifiedCols,
|
|
|
|
64);
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WITH_CHECK_OPTION_VIOLATION),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errmsg("new row violates check option for view \"%s\"",
|
|
|
|
wco->relname),
|
2015-04-25 02:34:26 +02:00
|
|
|
val_desc ? errdetail("Failing row contains %s.",
|
|
|
|
val_desc) : 0));
|
|
|
|
break;
|
|
|
|
case WCO_RLS_INSERT_CHECK:
|
|
|
|
case WCO_RLS_UPDATE_CHECK:
|
2015-09-15 21:49:31 +02:00
|
|
|
if (wco->polname != NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2016-06-10 00:02:36 +02:00
|
|
|
errmsg("new row violates row-level security policy \"%s\" for table \"%s\"",
|
|
|
|
wco->polname, wco->relname)));
|
2015-09-15 21:49:31 +02:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2016-06-10 00:02:36 +02:00
|
|
|
errmsg("new row violates row-level security policy for table \"%s\"",
|
|
|
|
wco->relname)));
|
2015-04-25 02:34:26 +02:00
|
|
|
break;
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
case WCO_RLS_CONFLICT_CHECK:
|
2015-09-15 21:49:31 +02:00
|
|
|
if (wco->polname != NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2016-06-10 00:02:36 +02:00
|
|
|
errmsg("new row violates row-level security policy \"%s\" (USING expression) for table \"%s\"",
|
|
|
|
wco->polname, wco->relname)));
|
2015-09-15 21:49:31 +02:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2016-06-10 00:02:36 +02:00
|
|
|
errmsg("new row violates row-level security policy (USING expression) for table \"%s\"",
|
|
|
|
wco->relname)));
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
break;
|
2015-04-25 02:34:26 +02:00
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized WCO kind: %u", wco->kind);
|
|
|
|
break;
|
|
|
|
}
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
}
|
2013-07-18 23:10:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-29 21:02:10 +01:00
|
|
|
/*
|
|
|
|
* ExecBuildSlotValueDescription -- construct a string representing a tuple
|
|
|
|
*
|
|
|
|
* This is intentionally very similar to BuildIndexValueDescription, but
|
2013-11-07 20:41:36 +01:00
|
|
|
* unlike that function, we truncate long field values (to at most maxfieldlen
|
2014-05-06 18:12:18 +02:00
|
|
|
* bytes). That seems necessary here since heap field values could be very
|
2013-11-07 20:41:36 +01:00
|
|
|
* long, whereas index entries typically aren't so wide.
|
|
|
|
*
|
|
|
|
* Also, unlike the case with index entries, we need to be prepared to ignore
|
|
|
|
* dropped columns. We used to use the slot's tuple descriptor to decode the
|
|
|
|
* data, but the slot's descriptor doesn't identify dropped columns, so we
|
|
|
|
* now need to be passed the relation's descriptor.
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
*
|
|
|
|
* Note that, like BuildIndexValueDescription, if the user does not have
|
|
|
|
* permission to view any of the columns involved, a NULL is returned. Unlike
|
|
|
|
* BuildIndexValueDescription, if the user has access to view a subset of the
|
|
|
|
* column involved, that subset will be returned with a key identifying which
|
|
|
|
* columns they are.
|
2011-11-29 21:02:10 +01:00
|
|
|
*/
|
|
|
|
static char *
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
ExecBuildSlotValueDescription(Oid reloid,
|
|
|
|
TupleTableSlot *slot,
|
2013-11-07 20:41:36 +01:00
|
|
|
TupleDesc tupdesc,
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
Bitmapset *modifiedCols,
|
2013-11-07 20:41:36 +01:00
|
|
|
int maxfieldlen)
|
2011-11-29 21:02:10 +01:00
|
|
|
{
|
|
|
|
StringInfoData buf;
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
StringInfoData collist;
|
2013-11-07 20:41:36 +01:00
|
|
|
bool write_comma = false;
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
bool write_comma_collist = false;
|
2011-11-29 21:02:10 +01:00
|
|
|
int i;
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
AclResult aclresult;
|
|
|
|
bool table_perm = false;
|
|
|
|
bool any_perm = false;
|
2011-11-29 21:02:10 +01:00
|
|
|
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
/*
|
|
|
|
* Check if RLS is enabled and should be active for the relation; if so,
|
|
|
|
* then don't return anything. Otherwise, go through normal permission
|
|
|
|
* checks.
|
|
|
|
*/
|
2015-07-28 22:21:22 +02:00
|
|
|
if (check_enable_rls(reloid, InvalidOid, true) == RLS_ENABLED)
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
return NULL;
|
2011-11-29 21:02:10 +01:00
|
|
|
|
|
|
|
initStringInfo(&buf);
|
|
|
|
|
|
|
|
appendStringInfoChar(&buf, '(');
|
|
|
|
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
/*
|
|
|
|
* Check if the user has permissions to see the row. Table-level SELECT
|
|
|
|
* allows access to all columns. If the user does not have table-level
|
|
|
|
* SELECT then we check each column and include those the user has SELECT
|
|
|
|
* rights on. Additionally, we always include columns the user provided
|
|
|
|
* data for.
|
|
|
|
*/
|
|
|
|
aclresult = pg_class_aclcheck(reloid, GetUserId(), ACL_SELECT);
|
|
|
|
if (aclresult != ACLCHECK_OK)
|
|
|
|
{
|
|
|
|
/* Set up the buffer for the column list */
|
|
|
|
initStringInfo(&collist);
|
|
|
|
appendStringInfoChar(&collist, '(');
|
|
|
|
}
|
|
|
|
else
|
|
|
|
table_perm = any_perm = true;
|
|
|
|
|
|
|
|
/* Make sure the tuple is fully deconstructed */
|
|
|
|
slot_getallattrs(slot);
|
|
|
|
|
2011-11-29 21:02:10 +01:00
|
|
|
for (i = 0; i < tupdesc->natts; i++)
|
|
|
|
{
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
bool column_perm = false;
|
2011-11-29 21:02:10 +01:00
|
|
|
char *val;
|
|
|
|
int vallen;
|
2017-08-20 20:19:07 +02:00
|
|
|
Form_pg_attribute att = TupleDescAttr(tupdesc, i);
|
2011-11-29 21:02:10 +01:00
|
|
|
|
2013-11-07 20:41:36 +01:00
|
|
|
/* ignore dropped columns */
|
2017-08-20 20:19:07 +02:00
|
|
|
if (att->attisdropped)
|
2013-11-07 20:41:36 +01:00
|
|
|
continue;
|
|
|
|
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
if (!table_perm)
|
2011-11-29 21:02:10 +01:00
|
|
|
{
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
/*
|
|
|
|
* No table-level SELECT, so need to make sure they either have
|
2015-05-24 03:35:49 +02:00
|
|
|
* SELECT rights on the column or that they have provided the data
|
|
|
|
* for the column. If not, omit this column from the error
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
* message.
|
|
|
|
*/
|
2017-08-20 20:19:07 +02:00
|
|
|
aclresult = pg_attribute_aclcheck(reloid, att->attnum,
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
GetUserId(), ACL_SELECT);
|
2017-08-20 20:19:07 +02:00
|
|
|
if (bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
modifiedCols) || aclresult == ACLCHECK_OK)
|
|
|
|
{
|
|
|
|
column_perm = any_perm = true;
|
2011-11-29 21:02:10 +01:00
|
|
|
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
if (write_comma_collist)
|
|
|
|
appendStringInfoString(&collist, ", ");
|
|
|
|
else
|
|
|
|
write_comma_collist = true;
|
2011-11-29 21:02:10 +01:00
|
|
|
|
2017-08-20 20:19:07 +02:00
|
|
|
appendStringInfoString(&collist, NameStr(att->attname));
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
}
|
|
|
|
}
|
2011-11-29 21:02:10 +01:00
|
|
|
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
if (table_perm || column_perm)
|
2011-11-29 21:02:10 +01:00
|
|
|
{
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
if (slot->tts_isnull[i])
|
|
|
|
val = "null";
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Oid foutoid;
|
|
|
|
bool typisvarlena;
|
|
|
|
|
2017-08-20 20:19:07 +02:00
|
|
|
getTypeOutputInfo(att->atttypid,
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
&foutoid, &typisvarlena);
|
|
|
|
val = OidOutputFunctionCall(foutoid, slot->tts_values[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (write_comma)
|
|
|
|
appendStringInfoString(&buf, ", ");
|
|
|
|
else
|
|
|
|
write_comma = true;
|
|
|
|
|
|
|
|
/* truncate if needed */
|
|
|
|
vallen = strlen(val);
|
|
|
|
if (vallen <= maxfieldlen)
|
|
|
|
appendStringInfoString(&buf, val);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
vallen = pg_mbcliplen(val, vallen, maxfieldlen);
|
|
|
|
appendBinaryStringInfo(&buf, val, vallen);
|
|
|
|
appendStringInfoString(&buf, "...");
|
|
|
|
}
|
2011-11-29 21:02:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
/* If we end up with zero columns being returned, then return NULL. */
|
|
|
|
if (!any_perm)
|
|
|
|
return NULL;
|
|
|
|
|
2011-11-29 21:02:10 +01:00
|
|
|
appendStringInfoChar(&buf, ')');
|
|
|
|
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
if (!table_perm)
|
|
|
|
{
|
|
|
|
appendStringInfoString(&collist, ") = ");
|
|
|
|
appendStringInfoString(&collist, buf.data);
|
|
|
|
|
|
|
|
return collist.data;
|
|
|
|
}
|
|
|
|
|
2011-11-29 21:02:10 +01:00
|
|
|
return buf.data;
|
|
|
|
}
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
/*
|
|
|
|
* ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a
|
|
|
|
* given ResultRelInfo
|
|
|
|
*/
|
|
|
|
LockTupleMode
|
|
|
|
ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
|
|
|
|
{
|
|
|
|
Bitmapset *keyCols;
|
|
|
|
Bitmapset *updatedCols;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute lock mode to use. If columns that are part of the key have not
|
|
|
|
* been modified, then we can use a weaker lock, allowing for better
|
|
|
|
* concurrency.
|
|
|
|
*/
|
|
|
|
updatedCols = GetUpdatedColumns(relinfo, estate);
|
|
|
|
keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
|
|
|
|
INDEX_ATTR_BITMAP_KEY);
|
|
|
|
|
|
|
|
if (bms_overlap(keyCols, updatedCols))
|
|
|
|
return LockTupleExclusive;
|
|
|
|
|
|
|
|
return LockTupleNoKeyExclusive;
|
|
|
|
}
|
|
|
|
|
2011-01-13 02:47:02 +01:00
|
|
|
/*
|
|
|
|
* ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
*
|
|
|
|
* If no such struct, either return NULL or throw error depending on missing_ok
|
2011-01-13 02:47:02 +01:00
|
|
|
*/
|
|
|
|
ExecRowMark *
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
|
2011-01-13 02:47:02 +01:00
|
|
|
{
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
foreach(lc, estate->es_rowMarks)
|
|
|
|
{
|
|
|
|
ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
|
|
|
|
|
|
|
|
if (erm->rti == rti)
|
|
|
|
return erm;
|
|
|
|
}
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
if (!missing_ok)
|
|
|
|
elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
|
|
|
|
return NULL;
|
2011-01-13 02:47:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ExecBuildAuxRowMark -- create an ExecAuxRowMark struct
|
|
|
|
*
|
|
|
|
* Inputs are the underlying ExecRowMark struct and the targetlist of the
|
|
|
|
* input plan node (not planstate node!). We need the latter to find out
|
|
|
|
* the column numbers of the resjunk columns.
|
|
|
|
*/
|
|
|
|
ExecAuxRowMark *
|
|
|
|
ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
|
|
|
|
{
|
|
|
|
ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark));
|
|
|
|
char resname[32];
|
|
|
|
|
|
|
|
aerm->rowmark = erm;
|
|
|
|
|
|
|
|
/* Look up the resjunk columns associated with this rowmark */
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
if (erm->markType != ROW_MARK_COPY)
|
2011-01-13 02:47:02 +01:00
|
|
|
{
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
/* need ctid for all methods other than COPY */
|
2011-02-10 05:27:07 +01:00
|
|
|
snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
|
2011-01-13 02:47:02 +01:00
|
|
|
aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
|
|
|
|
resname);
|
2011-02-10 05:27:07 +01:00
|
|
|
if (!AttributeNumberIsValid(aerm->ctidAttNo))
|
|
|
|
elog(ERROR, "could not find junk %s column", resname);
|
2011-01-13 02:47:02 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
/* need wholerow if COPY */
|
2011-02-10 05:27:07 +01:00
|
|
|
snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
|
2011-01-13 02:47:02 +01:00
|
|
|
aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
|
|
|
|
resname);
|
2011-02-10 05:27:07 +01:00
|
|
|
if (!AttributeNumberIsValid(aerm->wholeAttNo))
|
|
|
|
elog(ERROR, "could not find junk %s column", resname);
|
2011-01-13 02:47:02 +01:00
|
|
|
}
|
|
|
|
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
/* if child rel, need tableoid */
|
|
|
|
if (erm->rti != erm->prti)
|
|
|
|
{
|
|
|
|
snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId);
|
|
|
|
aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist,
|
|
|
|
resname);
|
|
|
|
if (!AttributeNumberIsValid(aerm->toidAttNo))
|
|
|
|
elog(ERROR, "could not find junk %s column", resname);
|
|
|
|
}
|
|
|
|
|
2011-01-13 02:47:02 +01:00
|
|
|
return aerm;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-05-15 02:33:36 +02:00
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
|
|
|
|
* process the updated version under READ COMMITTED rules.
|
2001-05-15 02:33:36 +02:00
|
|
|
*
|
|
|
|
* See backend/executor/README for some info about how this works.
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check a modified tuple to see if we want to process its updated version
|
|
|
|
* under READ COMMITTED rules.
|
2005-08-20 02:40:32 +02:00
|
|
|
*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* estate - outer executor state data
|
|
|
|
* epqstate - state for EvalPlanQual rechecking
|
|
|
|
* relation - table containing tuple
|
2005-08-20 02:40:32 +02:00
|
|
|
* rti - rangetable index of table containing tuple
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
* lockmode - requested tuple lock mode
|
2005-08-20 02:40:32 +02:00
|
|
|
* *tid - t_ctid from the outdated tuple (ie, next updated version)
|
|
|
|
* priorXmax - t_xmax from the outdated tuple
|
|
|
|
*
|
|
|
|
* *tid is also an output parameter: it's modified to hold the TID of the
|
|
|
|
* latest version of the tuple (note this may be changed even on failure)
|
|
|
|
*
|
|
|
|
* Returns a slot containing the new candidate update/delete tuple, or
|
|
|
|
* NULL if we determine we shouldn't process the row.
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
*
|
|
|
|
* Note: properly, lockmode should be declared as enum LockTupleMode,
|
|
|
|
* but we use "int" to avoid having to include heapam.h in executor.h.
|
2001-05-15 02:33:36 +02:00
|
|
|
*/
|
1999-05-25 18:15:34 +02:00
|
|
|
TupleTableSlot *
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQual(EState *estate, EPQState *epqstate,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
Relation relation, Index rti, int lockmode,
|
2007-11-30 22:22:54 +01:00
|
|
|
ItemPointer tid, TransactionId priorXmax)
|
1999-01-29 10:23:17 +01:00
|
|
|
{
|
2009-10-12 20:10:51 +02:00
|
|
|
TupleTableSlot *slot;
|
|
|
|
HeapTuple copyTuple;
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
Assert(rti > 0);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Get and lock the updated version of the row; if fail, return NULL.
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
2014-10-07 22:23:34 +02:00
|
|
|
copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock,
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
tid, priorXmax);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
|
|
|
if (copyTuple == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For UPDATE/DELETE we have to return tid of actual row we're executing
|
|
|
|
* PQ for.
|
|
|
|
*/
|
|
|
|
*tid = copyTuple->t_self;
|
|
|
|
|
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Need to run a recheck subquery. Initialize or reinitialize EPQ state.
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualBegin(epqstate, estate);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Free old test tuple, if any, and store new tuple where relation's scan
|
|
|
|
* node will see it
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualSetTuple(epqstate, rti, copyTuple);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Fetch any non-locked source rows
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualFetchRowMarks(epqstate);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Run the EPQ query. We assume it will return at most one tuple.
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
slot = EvalPlanQualNext(epqstate);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
2009-12-11 19:14:43 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* If we got a tuple, force the slot to materialize the tuple so that it
|
|
|
|
* is not dependent on any local state in the EPQ query (in particular,
|
2009-12-11 19:14:43 +01:00
|
|
|
* it's highly likely that the slot contains references to any pass-by-ref
|
2010-02-26 03:01:40 +01:00
|
|
|
* datums that may be present in copyTuple). As with the next step, this
|
|
|
|
* is to guard against early re-use of the EPQ query.
|
2009-12-11 19:14:43 +01:00
|
|
|
*/
|
|
|
|
if (!TupIsNull(slot))
|
|
|
|
(void) ExecMaterializeSlot(slot);
|
|
|
|
|
2009-10-12 20:10:51 +02:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Clear out the test tuple. This is needed in case the EPQ query is
|
|
|
|
* re-used to test a tuple for a different relation. (Not clear that can
|
|
|
|
* really happen, but let's be safe.)
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualSetTuple(epqstate, rti, NULL);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
|
|
|
return slot;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fetch a copy of the newest version of an outdated tuple
|
|
|
|
*
|
|
|
|
* estate - executor state data
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* relation - table containing tuple
|
|
|
|
* lockmode - requested tuple lock mode
|
2014-10-07 22:23:34 +02:00
|
|
|
* wait_policy - requested lock wait policy
|
2009-10-12 20:10:51 +02:00
|
|
|
* *tid - t_ctid from the outdated tuple (ie, next updated version)
|
|
|
|
* priorXmax - t_xmax from the outdated tuple
|
|
|
|
*
|
|
|
|
* Returns a palloc'd copy of the newest tuple version, or NULL if we find
|
|
|
|
* that there is no newest version (ie, the row was deleted not updated).
|
2014-10-07 22:23:34 +02:00
|
|
|
* We also return NULL if the tuple is locked and the wait policy is to skip
|
|
|
|
* such tuples.
|
|
|
|
*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* If successful, we have locked the newest tuple version, so caller does not
|
|
|
|
* need to worry about it changing anymore.
|
2009-10-12 20:10:51 +02:00
|
|
|
*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Note: properly, lockmode should be declared as enum LockTupleMode,
|
|
|
|
* but we use "int" to avoid having to include heapam.h in executor.h.
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
|
|
|
HeapTuple
|
2014-10-07 22:23:34 +02:00
|
|
|
EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
|
|
|
|
LockWaitPolicy wait_policy,
|
2009-10-12 20:10:51 +02:00
|
|
|
ItemPointer tid, TransactionId priorXmax)
|
|
|
|
{
|
|
|
|
HeapTuple copyTuple = NULL;
|
1999-05-25 18:15:34 +02:00
|
|
|
HeapTupleData tuple;
|
2007-03-25 21:45:14 +02:00
|
|
|
SnapshotData SnapshotDirty;
|
1999-01-29 10:23:17 +01:00
|
|
|
|
2001-05-15 02:33:36 +02:00
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* fetch target tuple
|
2001-05-15 02:33:36 +02:00
|
|
|
*
|
|
|
|
* Loop here to deal with updated or busy tuples
|
|
|
|
*/
|
2007-03-25 21:45:14 +02:00
|
|
|
InitDirtySnapshot(SnapshotDirty);
|
2001-05-15 02:33:36 +02:00
|
|
|
tuple.t_self = *tid;
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
Buffer buffer;
|
|
|
|
|
2007-03-25 21:45:14 +02:00
|
|
|
if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
|
2001-05-15 02:33:36 +02:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
HTSU_Result test;
|
2012-10-26 21:55:36 +02:00
|
|
|
HeapUpdateFailureData hufd;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
2005-08-20 02:40:32 +02:00
|
|
|
/*
|
|
|
|
* If xmin isn't what we're expecting, the slot must have been
|
2014-05-06 18:12:18 +02:00
|
|
|
* recycled and reused for an unrelated tuple. This implies that
|
2005-10-15 04:49:52 +02:00
|
|
|
* the latest version of the row was deleted, so we need do
|
|
|
|
* nothing. (Should be safe to examine xmin without getting
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
* buffer's content lock. We assume reading a TransactionId to be
|
|
|
|
* atomic, and Xmin never changes in an existing tuple, except to
|
|
|
|
* invalid or frozen, and neither of those can match priorXmax.)
|
2005-08-20 02:40:32 +02:00
|
|
|
*/
|
2017-11-02 15:51:05 +01:00
|
|
|
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
|
|
|
|
priorXmax))
|
2005-08-20 02:40:32 +02:00
|
|
|
{
|
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
return NULL;
|
|
|
|
}
|
2001-05-15 02:33:36 +02:00
|
|
|
|
2005-08-20 02:40:32 +02:00
|
|
|
/* otherwise xmin should not be dirty... */
|
2007-03-25 21:45:14 +02:00
|
|
|
if (TransactionIdIsValid(SnapshotDirty.xmin))
|
2003-07-21 19:05:12 +02:00
|
|
|
elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
|
2001-05-15 02:33:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If tuple is being updated by other transaction then we have to
|
2014-08-28 01:15:18 +02:00
|
|
|
* wait for its commit/abort, or die trying.
|
2001-05-15 02:33:36 +02:00
|
|
|
*/
|
2007-03-25 21:45:14 +02:00
|
|
|
if (TransactionIdIsValid(SnapshotDirty.xmax))
|
2001-05-15 02:33:36 +02:00
|
|
|
{
|
|
|
|
ReleaseBuffer(buffer);
|
2014-10-07 22:23:34 +02:00
|
|
|
switch (wait_policy)
|
2014-08-28 01:15:18 +02:00
|
|
|
{
|
2014-10-07 22:23:34 +02:00
|
|
|
case LockWaitBlock:
|
|
|
|
XactLockTableWait(SnapshotDirty.xmax,
|
2015-02-04 15:00:34 +01:00
|
|
|
relation, &tuple.t_self,
|
2014-10-07 22:23:34 +02:00
|
|
|
XLTW_FetchUpdated);
|
|
|
|
break;
|
|
|
|
case LockWaitSkip:
|
|
|
|
if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
return NULL; /* skip instead of waiting */
|
2014-10-07 22:23:34 +02:00
|
|
|
break;
|
|
|
|
case LockWaitError:
|
|
|
|
if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
|
|
|
|
errmsg("could not obtain lock on row in relation \"%s\"",
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
RelationGetRelationName(relation))));
|
2014-10-07 22:23:34 +02:00
|
|
|
break;
|
2014-08-28 01:15:18 +02:00
|
|
|
}
|
2005-08-20 02:40:32 +02:00
|
|
|
continue; /* loop back to repeat heap_fetch */
|
2001-05-15 02:33:36 +02:00
|
|
|
}
|
|
|
|
|
2006-01-12 22:48:53 +01:00
|
|
|
/*
|
|
|
|
* If tuple was inserted by our own transaction, we have to check
|
2007-11-30 22:22:54 +01:00
|
|
|
* cmin against es_output_cid: cmin >= current CID means our
|
2013-05-29 22:58:43 +02:00
|
|
|
* command cannot see the tuple, so we should ignore it. Otherwise
|
|
|
|
* heap_lock_tuple() will throw an error, and so would any later
|
|
|
|
* attempt to update or delete the tuple. (We need not check cmax
|
|
|
|
* because HeapTupleSatisfiesDirty will consider a tuple deleted
|
2015-01-09 17:01:31 +01:00
|
|
|
* by our transaction dead, regardless of cmax.) We just checked
|
2013-05-29 22:58:43 +02:00
|
|
|
* that priorXmax == xmin, so we can test that variable instead of
|
|
|
|
* doing HeapTupleHeaderGetXmin again.
|
2006-01-12 22:48:53 +01:00
|
|
|
*/
|
|
|
|
if (TransactionIdIsCurrentTransactionId(priorXmax) &&
|
2007-11-30 22:22:54 +01:00
|
|
|
HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
|
2006-01-12 22:48:53 +01:00
|
|
|
{
|
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/*
|
|
|
|
* This is a live tuple, so now try to lock it.
|
|
|
|
*/
|
2012-10-26 21:55:36 +02:00
|
|
|
test = heap_lock_tuple(relation, &tuple,
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_output_cid,
|
2014-10-07 22:23:34 +02:00
|
|
|
lockmode, wait_policy,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
false, &buffer, &hufd);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/* We now have two pins on the buffer, get rid of one */
|
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
|
|
|
|
switch (test)
|
|
|
|
{
|
|
|
|
case HeapTupleSelfUpdated:
|
2013-05-29 22:58:43 +02:00
|
|
|
|
2012-10-26 21:55:36 +02:00
|
|
|
/*
|
|
|
|
* The target tuple was already updated or deleted by the
|
|
|
|
* current command, or by a later command in the current
|
|
|
|
* transaction. We *must* ignore the tuple in the former
|
|
|
|
* case, so as to avoid the "Halloween problem" of
|
|
|
|
* repeated update attempts. In the latter case it might
|
|
|
|
* be sensible to fetch the updated tuple instead, but
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
* doing so would require changing heap_update and
|
|
|
|
* heap_delete to not complain about updating "invisible"
|
|
|
|
* tuples, which seems pretty scary (heap_lock_tuple will
|
2015-05-24 03:35:49 +02:00
|
|
|
* not complain, but few callers expect
|
|
|
|
* HeapTupleInvisible, and we're not one of them). So for
|
|
|
|
* now, treat the tuple as deleted and do not process.
|
2012-10-26 21:55:36 +02:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
case HeapTupleMayBeUpdated:
|
|
|
|
/* successfully locked */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HeapTupleUpdated:
|
|
|
|
ReleaseBuffer(buffer);
|
2010-09-11 20:38:58 +02:00
|
|
|
if (IsolationUsesXactSnapshot())
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
|
|
|
|
errmsg("could not serialize access due to concurrent update")));
|
Raise error when affecting tuple moved into different partition.
When an update moves a row between partitions (supported since
2f178441044b), our normal logic for following update chains in READ
COMMITTED mode doesn't work anymore. Cross partition updates are
modeled as an delete from the old and insert into the new
partition. No ctid chain exists across partitions, and there's no
convenient space to introduce that link.
Not throwing an error in a partitioned context when one would have
been thrown without partitioning is obviously problematic. This commit
introduces infrastructure to detect when a tuple has been moved, not
just plainly deleted. That allows to throw an error when encountering
a deletion that's actually a move, while attempting to following a
ctid chain.
The row deleted as part of a cross partition update is marked by
pointing it's t_ctid to an invalid block, instead of self as a normal
update would. That was deemed to be the least invasive and most
future proof way to represent the knowledge, given how few infomask
bits are there to be recycled (there's also some locking issues with
using infomask bits).
External code following ctid chains should be updated to check for
moved tuples. The most likely consequence of not doing so is a missed
error.
Author: Amul Sul, editorialized by me
Reviewed-By: Amit Kapila, Pavan Deolasee, Andres Freund, Robert Haas
Discussion: http://postgr.es/m/CAAJ_b95PkwojoYfz0bzXU8OokcTVGzN6vYGCNVUukeUDrnF3dw@mail.gmail.com
2018-04-07 22:24:10 +02:00
|
|
|
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
|
|
|
|
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
|
|
|
|
/* Should not encounter speculative tuple on recheck */
|
|
|
|
Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
|
2012-10-26 21:55:36 +02:00
|
|
|
if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
{
|
|
|
|
/* it was updated, so look at the updated version */
|
2012-10-26 21:55:36 +02:00
|
|
|
tuple.t_self = hufd.ctid;
|
2010-01-08 03:44:00 +01:00
|
|
|
/* updated row should have xmin matching this xmax */
|
2012-10-26 21:55:36 +02:00
|
|
|
priorXmax = hufd.xmax;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* tuple was deleted, so give up */
|
|
|
|
return NULL;
|
|
|
|
|
2014-10-07 22:23:34 +02:00
|
|
|
case HeapTupleWouldBlock:
|
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
return NULL;
|
|
|
|
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
case HeapTupleInvisible:
|
|
|
|
elog(ERROR, "attempted to lock invisible tuple");
|
2018-05-02 01:35:08 +02:00
|
|
|
break;
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
default:
|
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
elog(ERROR, "unrecognized heap_lock_tuple status: %u",
|
|
|
|
test);
|
|
|
|
return NULL; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
|
2001-05-15 02:33:36 +02:00
|
|
|
/*
|
|
|
|
* We got tuple - now copy it for use by recheck query.
|
|
|
|
*/
|
|
|
|
copyTuple = heap_copytuple(&tuple);
|
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If the referenced slot was actually empty, the latest version of
|
|
|
|
* the row must have been deleted, so we need do nothing.
|
2001-05-15 02:33:36 +02:00
|
|
|
*/
|
2005-08-20 02:40:32 +02:00
|
|
|
if (tuple.t_data == NULL)
|
2001-05-15 02:33:36 +02:00
|
|
|
{
|
2005-08-20 02:40:32 +02:00
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
return NULL;
|
2001-05-15 02:33:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2005-08-20 02:40:32 +02:00
|
|
|
* As above, if xmin isn't what we're expecting, do nothing.
|
2001-05-15 02:33:36 +02:00
|
|
|
*/
|
2017-11-02 15:51:05 +01:00
|
|
|
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
|
|
|
|
priorXmax))
|
2005-08-20 02:40:32 +02:00
|
|
|
{
|
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we get here, the tuple was found but failed SnapshotDirty.
|
2005-10-15 04:49:52 +02:00
|
|
|
* Assuming the xmin is either a committed xact or our own xact (as it
|
|
|
|
* certainly should be if we're trying to modify the tuple), this must
|
|
|
|
* mean that the row was updated or deleted by either a committed xact
|
|
|
|
* or our own xact. If it was deleted, we can ignore it; if it was
|
|
|
|
* updated then chain up to the next version and repeat the whole
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* process.
|
2005-08-20 02:40:32 +02:00
|
|
|
*
|
2005-10-15 04:49:52 +02:00
|
|
|
* As above, it should be safe to examine xmax and t_ctid without the
|
|
|
|
* buffer content lock, because they can't be changing.
|
2005-08-20 02:40:32 +02:00
|
|
|
*/
|
Raise error when affecting tuple moved into different partition.
When an update moves a row between partitions (supported since
2f178441044b), our normal logic for following update chains in READ
COMMITTED mode doesn't work anymore. Cross partition updates are
modeled as an delete from the old and insert into the new
partition. No ctid chain exists across partitions, and there's no
convenient space to introduce that link.
Not throwing an error in a partitioned context when one would have
been thrown without partitioning is obviously problematic. This commit
introduces infrastructure to detect when a tuple has been moved, not
just plainly deleted. That allows to throw an error when encountering
a deletion that's actually a move, while attempting to following a
ctid chain.
The row deleted as part of a cross partition update is marked by
pointing it's t_ctid to an invalid block, instead of self as a normal
update would. That was deemed to be the least invasive and most
future proof way to represent the knowledge, given how few infomask
bits are there to be recycled (there's also some locking issues with
using infomask bits).
External code following ctid chains should be updated to check for
moved tuples. The most likely consequence of not doing so is a missed
error.
Author: Amul Sul, editorialized by me
Reviewed-By: Amit Kapila, Pavan Deolasee, Andres Freund, Robert Haas
Discussion: http://postgr.es/m/CAAJ_b95PkwojoYfz0bzXU8OokcTVGzN6vYGCNVUukeUDrnF3dw@mail.gmail.com
2018-04-07 22:24:10 +02:00
|
|
|
|
|
|
|
/* check whether next version would be in a different partition */
|
|
|
|
if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
|
|
|
|
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
|
|
|
|
|
|
|
|
/* check whether tuple has been deleted */
|
2005-08-20 02:40:32 +02:00
|
|
|
if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
|
|
|
|
{
|
|
|
|
/* deleted, so forget about it */
|
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* updated, so look at the updated row */
|
|
|
|
tuple.t_self = tuple.t_data->t_ctid;
|
|
|
|
/* updated row should have xmin matching this xmax */
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
|
2005-08-20 02:40:32 +02:00
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
/* loop back to fetch next in chain */
|
2001-05-15 02:33:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-10-12 20:10:51 +02:00
|
|
|
* Return the copied tuple
|
2001-05-15 02:33:36 +02:00
|
|
|
*/
|
2009-10-12 20:10:51 +02:00
|
|
|
return copyTuple;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* EvalPlanQualInit -- initialize during creation of a plan state node
|
|
|
|
* that might need to invoke EPQ processing.
|
2011-01-13 02:47:02 +01:00
|
|
|
*
|
|
|
|
* Note: subplan/auxrowmarks can be NULL/NIL if they will be set later
|
|
|
|
* with EvalPlanQualSetPlan.
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
|
|
|
void
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualInit(EPQState *epqstate, EState *estate,
|
2011-01-13 02:47:02 +01:00
|
|
|
Plan *subplan, List *auxrowmarks, int epqParam)
|
2009-10-12 20:10:51 +02:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/* Mark the EPQ state inactive */
|
|
|
|
epqstate->estate = NULL;
|
|
|
|
epqstate->planstate = NULL;
|
|
|
|
epqstate->origslot = NULL;
|
|
|
|
/* ... and remember data that EvalPlanQualBegin will need */
|
|
|
|
epqstate->plan = subplan;
|
2011-01-13 02:47:02 +01:00
|
|
|
epqstate->arowMarks = auxrowmarks;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
epqstate->epqParam = epqParam;
|
|
|
|
}
|
2009-10-12 20:10:51 +02:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/*
|
|
|
|
* EvalPlanQualSetPlan -- set or change subplan of an EPQState.
|
|
|
|
*
|
2015-01-19 17:36:22 +01:00
|
|
|
* We need this so that ModifyTable can deal with multiple subplans.
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
*/
|
|
|
|
void
|
2011-01-13 02:47:02 +01:00
|
|
|
EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
{
|
|
|
|
/* If we have a live EPQ query, shut it down */
|
|
|
|
EvalPlanQualEnd(epqstate);
|
|
|
|
/* And set/change the plan pointer */
|
|
|
|
epqstate->plan = subplan;
|
2011-01-13 02:47:02 +01:00
|
|
|
/* The rowmarks depend on the plan, too */
|
|
|
|
epqstate->arowMarks = auxrowmarks;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
}
|
2001-05-15 02:33:36 +02:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/*
|
|
|
|
* Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
|
|
|
|
*
|
|
|
|
* NB: passed tuple must be palloc'd; it may get freed later
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
|
|
|
|
{
|
|
|
|
EState *estate = epqstate->estate;
|
1999-01-29 10:23:17 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
Assert(rti > 0);
|
1999-01-29 10:23:17 +01:00
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* free old test tuple, if any, and store new tuple where relation's scan
|
|
|
|
* node will see it
|
1999-01-29 10:23:17 +01:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
if (estate->es_epqTuple[rti - 1] != NULL)
|
|
|
|
heap_freetuple(estate->es_epqTuple[rti - 1]);
|
|
|
|
estate->es_epqTuple[rti - 1] = tuple;
|
|
|
|
estate->es_epqTupleSet[rti - 1] = true;
|
|
|
|
}
|
1999-01-29 10:23:17 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/*
|
|
|
|
* Fetch back the current test tuple (if any) for the specified RTI
|
|
|
|
*/
|
|
|
|
HeapTuple
|
|
|
|
EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
|
|
|
|
{
|
|
|
|
EState *estate = epqstate->estate;
|
1999-01-29 10:23:17 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
Assert(rti > 0);
|
1999-01-29 10:23:17 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
return estate->es_epqTuple[rti - 1];
|
2009-10-12 20:10:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Fetch the current row values for any non-locked relations that need
|
2014-05-06 18:12:18 +02:00
|
|
|
* to be scanned by an EvalPlanQual operation. origslot must have been set
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* to contain the current result row (top-level row) that we need to recheck.
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
|
|
|
void
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualFetchRowMarks(EPQState *epqstate)
|
2009-10-12 20:10:51 +02:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
ListCell *l;
|
2009-10-12 20:10:51 +02:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
Assert(epqstate->origslot != NULL);
|
2002-12-18 01:14:47 +01:00
|
|
|
|
2011-01-13 02:47:02 +01:00
|
|
|
foreach(l, epqstate->arowMarks)
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
{
|
2011-01-13 02:47:02 +01:00
|
|
|
ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(l);
|
|
|
|
ExecRowMark *erm = aerm->rowmark;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
Datum datum;
|
|
|
|
bool isNull;
|
|
|
|
HeapTupleData tuple;
|
|
|
|
|
2011-01-13 02:47:02 +01:00
|
|
|
if (RowMarkRequiresRowShareLock(erm->markType))
|
|
|
|
elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/* clear any leftover test tuple for this rel */
|
|
|
|
EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
|
|
|
|
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
/* if child rel, must check whether it produced this row */
|
|
|
|
if (erm->rti != erm->prti)
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
{
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
Oid tableoid;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
datum = ExecGetJunkAttribute(epqstate->origslot,
|
|
|
|
aerm->toidAttNo,
|
|
|
|
&isNull);
|
|
|
|
/* non-locked rels could be on the inside of outer joins */
|
|
|
|
if (isNull)
|
|
|
|
continue;
|
|
|
|
tableoid = DatumGetObjectId(datum);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
Assert(OidIsValid(erm->relid));
|
|
|
|
if (tableoid != erm->relid)
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
{
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
/* this child is inactive right now */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
if (erm->markType == ROW_MARK_REFERENCE)
|
|
|
|
{
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
HeapTuple copyTuple;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
Assert(erm->relation != NULL);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
|
|
|
/* fetch the tuple's ctid */
|
|
|
|
datum = ExecGetJunkAttribute(epqstate->origslot,
|
2011-01-13 02:47:02 +01:00
|
|
|
aerm->ctidAttNo,
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
&isNull);
|
|
|
|
/* non-locked rels could be on the inside of outer joins */
|
|
|
|
if (isNull)
|
|
|
|
continue;
|
|
|
|
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
/* fetch requests on foreign tables must be passed to their FDW */
|
|
|
|
if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
|
|
|
|
{
|
|
|
|
FdwRoutine *fdwroutine;
|
|
|
|
bool updated = false;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
|
|
|
|
/* this should have been checked already, but let's be safe */
|
|
|
|
if (fdwroutine->RefetchForeignRow == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errmsg("cannot lock rows in foreign table \"%s\"",
|
|
|
|
RelationGetRelationName(erm->relation))));
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
|
|
|
|
erm,
|
|
|
|
datum,
|
|
|
|
&updated);
|
|
|
|
if (copyTuple == NULL)
|
|
|
|
elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ideally we'd insist on updated == false here, but that
|
|
|
|
* assumes that FDWs can track that exactly, which they might
|
|
|
|
* not be able to. So just ignore the flag.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* ordinary table, fetch the tuple */
|
|
|
|
Buffer buffer;
|
|
|
|
|
|
|
|
tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
|
|
|
|
if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
|
|
|
|
false, NULL))
|
|
|
|
elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
|
|
|
|
|
2018-08-10 22:05:54 +02:00
|
|
|
/* successful, copy tuple */
|
|
|
|
copyTuple = heap_copytuple(&tuple);
|
Add support for doing late row locking in FDWs.
Previously, FDWs could only do "early row locking", that is lock a row as
soon as it's fetched, even though local restriction/join conditions might
discard the row later. This patch adds callbacks that allow FDWs to do
late locking in the same way that it's done for regular tables.
To make use of this feature, an FDW must support the "ctid" column as a
unique row identifier. Currently, since ctid has to be of type TID,
the feature is of limited use, though in principle it could be used by
postgres_fdw. We may eventually allow FDWs to specify another data type
for ctid, which would make it possible for more FDWs to use this feature.
This commit does not modify postgres_fdw to use late locking. We've
tested some prototype code for that, but it's not in committable shape,
and besides it's quite unclear whether it actually makes sense to do late
locking against a remote server. The extra round trips required are likely
to outweigh any benefit from improved concurrency.
Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
2015-05-12 20:10:10 +02:00
|
|
|
ReleaseBuffer(buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* store tuple */
|
|
|
|
EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
HeapTupleHeader td;
|
|
|
|
|
|
|
|
Assert(erm->markType == ROW_MARK_COPY);
|
|
|
|
|
|
|
|
/* fetch the whole-row Var for the relation */
|
|
|
|
datum = ExecGetJunkAttribute(epqstate->origslot,
|
2011-01-13 02:47:02 +01:00
|
|
|
aerm->wholeAttNo,
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
&isNull);
|
|
|
|
/* non-locked rels could be on the inside of outer joins */
|
|
|
|
if (isNull)
|
|
|
|
continue;
|
|
|
|
td = DatumGetHeapTupleHeader(datum);
|
|
|
|
|
|
|
|
/* build a temporary HeapTuple control structure */
|
|
|
|
tuple.t_len = HeapTupleHeaderGetDatumLength(td);
|
Fix postgres_fdw to return the right ctid value in EvalPlanQual cases.
If a postgres_fdw foreign table is a non-locked source relation in an
UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, and the query selects its
ctid column, the wrong value would be returned if an EvalPlanQual
recheck occurred. This happened because the foreign table's result row
was copied via the ROW_MARK_COPY code path, and EvalPlanQualFetchRowMarks
just unconditionally set the reconstructed tuple's t_self to "invalid".
To fix that, we can have EvalPlanQualFetchRowMarks copy the composite
datum's t_ctid field, and be sure to initialize that along with t_self
when postgres_fdw constructs a tuple to return.
If we just did that much then EvalPlanQualFetchRowMarks would start
returning "(0,0)" as ctid for all other ROW_MARK_COPY cases, which perhaps
does not matter much, but then again maybe it might. The cause of that is
that heap_form_tuple, which is the ultimate source of all composite datums,
simply leaves t_ctid as zeroes in newly constructed tuples. That seems
like a bad idea on general principles: a field that's really not been
initialized shouldn't appear to have a valid value. So let's eat the
trivial additional overhead of doing "ItemPointerSetInvalid(&(td->t_ctid))"
in heap_form_tuple.
This closes out our handling of Etsuro Fujita's report that tableoid and
ctid weren't correctly set in postgres_fdw EvalPlanQual cases. Along the
way we did a great deal of work to improve FDWs' ability to control row
locking behavior; which was not wasted effort by any means, but it didn't
end up being a fix for this problem because that feature would be too
expensive for postgres_fdw to use all the time.
Although the fix for the tableoid misbehavior was back-patched, I'm
hesitant to do so here; it seems far less likely that people would care
about remote ctid than tableoid, and even such a minor behavioral change
as this in heap_form_tuple is perhaps best not back-patched. So commit
to HEAD only, at least for the moment.
Etsuro Fujita, with some adjustments by me
2015-05-13 20:05:17 +02:00
|
|
|
tuple.t_data = td;
|
2015-03-12 18:38:49 +01:00
|
|
|
/* relation might be a foreign table, if so provide tableoid */
|
Allow foreign tables to participate in inheritance.
Foreign tables can now be inheritance children, or parents. Much of the
system was already ready for this, but we had to fix a few things of
course, mostly in the area of planner and executor handling of row locks.
As side effects of this, allow foreign tables to have NOT VALID CHECK
constraints (and hence to accept ALTER ... VALIDATE CONSTRAINT), and to
accept ALTER SET STORAGE and ALTER SET WITH/WITHOUT OIDS. Continuing to
disallow these things would've required bizarre and inconsistent special
cases in inheritance behavior. Since foreign tables don't enforce CHECK
constraints anyway, a NOT VALID one is a complete no-op, but that doesn't
mean we shouldn't allow it. And it's possible that some FDWs might have
use for SET STORAGE or SET WITH OIDS, though doubtless they will be no-ops
for most.
An additional change in support of this is that when a ModifyTable node
has multiple target tables, they will all now be explicitly identified
in EXPLAIN output, for example:
Update on pt1 (cost=0.00..321.05 rows=3541 width=46)
Update on pt1
Foreign Update on ft1
Foreign Update on ft2
Update on child3
-> Seq Scan on pt1 (cost=0.00..0.00 rows=1 width=46)
-> Foreign Scan on ft1 (cost=100.00..148.03 rows=1170 width=46)
-> Foreign Scan on ft2 (cost=100.00..148.03 rows=1170 width=46)
-> Seq Scan on child3 (cost=0.00..25.00 rows=1200 width=46)
This was done mainly to provide an unambiguous place to attach "Remote SQL"
fields, but it is useful for inherited updates even when no foreign tables
are involved.
Shigeru Hanada and Etsuro Fujita, reviewed by Ashutosh Bapat and Kyotaro
Horiguchi, some additional hacking by me
2015-03-22 18:53:11 +01:00
|
|
|
tuple.t_tableOid = erm->relid;
|
Fix postgres_fdw to return the right ctid value in EvalPlanQual cases.
If a postgres_fdw foreign table is a non-locked source relation in an
UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, and the query selects its
ctid column, the wrong value would be returned if an EvalPlanQual
recheck occurred. This happened because the foreign table's result row
was copied via the ROW_MARK_COPY code path, and EvalPlanQualFetchRowMarks
just unconditionally set the reconstructed tuple's t_self to "invalid".
To fix that, we can have EvalPlanQualFetchRowMarks copy the composite
datum's t_ctid field, and be sure to initialize that along with t_self
when postgres_fdw constructs a tuple to return.
If we just did that much then EvalPlanQualFetchRowMarks would start
returning "(0,0)" as ctid for all other ROW_MARK_COPY cases, which perhaps
does not matter much, but then again maybe it might. The cause of that is
that heap_form_tuple, which is the ultimate source of all composite datums,
simply leaves t_ctid as zeroes in newly constructed tuples. That seems
like a bad idea on general principles: a field that's really not been
initialized shouldn't appear to have a valid value. So let's eat the
trivial additional overhead of doing "ItemPointerSetInvalid(&(td->t_ctid))"
in heap_form_tuple.
This closes out our handling of Etsuro Fujita's report that tableoid and
ctid weren't correctly set in postgres_fdw EvalPlanQual cases. Along the
way we did a great deal of work to improve FDWs' ability to control row
locking behavior; which was not wasted effort by any means, but it didn't
end up being a fix for this problem because that feature would be too
expensive for postgres_fdw to use all the time.
Although the fix for the tableoid misbehavior was back-patched, I'm
hesitant to do so here; it seems far less likely that people would care
about remote ctid than tableoid, and even such a minor behavioral change
as this in heap_form_tuple is perhaps best not back-patched. So commit
to HEAD only, at least for the moment.
Etsuro Fujita, with some adjustments by me
2015-05-13 20:05:17 +02:00
|
|
|
/* also copy t_ctid in case there's valid data there */
|
|
|
|
tuple.t_self = td->t_ctid;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
|
|
|
/* copy and store tuple */
|
|
|
|
EvalPlanQualSetTuple(epqstate, erm->rti,
|
|
|
|
heap_copytuple(&tuple));
|
|
|
|
}
|
|
|
|
}
|
1999-01-29 10:23:17 +01:00
|
|
|
}
|
|
|
|
|
2009-10-12 20:10:51 +02:00
|
|
|
/*
|
|
|
|
* Fetch the next row (if any) from EvalPlanQual testing
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
*
|
|
|
|
* (In practice, there should never be more than one row...)
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
|
|
|
TupleTableSlot *
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualNext(EPQState *epqstate)
|
1999-01-29 10:23:17 +01:00
|
|
|
{
|
2002-12-18 01:14:47 +01:00
|
|
|
MemoryContext oldcontext;
|
1999-05-25 18:15:34 +02:00
|
|
|
TupleTableSlot *slot;
|
1999-01-29 10:23:17 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
|
|
|
|
slot = ExecProcNode(epqstate->planstate);
|
2002-12-18 01:14:47 +01:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
1999-01-29 10:23:17 +01:00
|
|
|
|
2009-10-12 20:10:51 +02:00
|
|
|
return slot;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Initialize or reset an EvalPlanQual state tree
|
2009-10-12 20:10:51 +02:00
|
|
|
*/
|
|
|
|
void
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
|
2009-10-12 20:10:51 +02:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EState *estate = epqstate->estate;
|
2009-10-12 20:10:51 +02:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
if (estate == NULL)
|
1999-01-29 10:23:17 +01:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/* First time through, so create a child EState */
|
|
|
|
EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
|
1999-01-29 10:23:17 +01:00
|
|
|
}
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
else
|
2000-04-07 09:24:47 +02:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/*
|
|
|
|
* We already have a suitable child EPQ tree, so just reset it.
|
|
|
|
*/
|
|
|
|
int rtsize = list_length(parentestate->es_range_table);
|
|
|
|
PlanState *planstate = epqstate->planstate;
|
2000-04-07 02:59:17 +02:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
|
|
|
|
|
|
|
|
/* Recopy current values of parent parameters */
|
2017-11-13 21:24:12 +01:00
|
|
|
if (parentestate->es_plannedstmt->paramExecTypes != NIL)
|
2000-04-07 02:59:17 +02:00
|
|
|
{
|
2017-11-13 21:24:12 +01:00
|
|
|
int i;
|
|
|
|
|
|
|
|
i = list_length(parentestate->es_plannedstmt->paramExecTypes);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
|
|
|
while (--i >= 0)
|
|
|
|
{
|
|
|
|
/* copy value if any, but not execPlan link */
|
|
|
|
estate->es_param_exec_vals[i].value =
|
|
|
|
parentestate->es_param_exec_vals[i].value;
|
|
|
|
estate->es_param_exec_vals[i].isnull =
|
|
|
|
parentestate->es_param_exec_vals[i].isnull;
|
|
|
|
}
|
2000-04-07 02:59:17 +02:00
|
|
|
}
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Mark child plan tree as needing rescan at all scan nodes. The
|
|
|
|
* first ExecProcNode will take care of actually doing the rescan.
|
|
|
|
*/
|
|
|
|
planstate->chgParam = bms_add_member(planstate->chgParam,
|
|
|
|
epqstate->epqParam);
|
2002-12-18 01:14:47 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Start execution of an EvalPlanQual plan tree.
|
2002-12-18 01:14:47 +01:00
|
|
|
*
|
|
|
|
* This is a cut-down version of ExecutorStart(): we copy some state from
|
|
|
|
* the top-level estate rather than initializing it fresh.
|
|
|
|
*/
|
|
|
|
static void
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
|
2002-12-18 01:14:47 +01:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EState *estate;
|
2002-12-18 01:14:47 +01:00
|
|
|
int rtsize;
|
|
|
|
MemoryContext oldcontext;
|
2007-02-27 02:11:26 +01:00
|
|
|
ListCell *l;
|
2002-12-18 01:14:47 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
rtsize = list_length(parentestate->es_range_table);
|
2002-12-18 01:14:47 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
epqstate->estate = estate = CreateExecutorState();
|
2002-12-18 01:14:47 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
2002-12-18 01:14:47 +01:00
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Child EPQ EStates share the parent's copy of unchanging state such as
|
2005-10-15 04:49:52 +02:00
|
|
|
* the snapshot, rangetable, result-rel info, and external Param info.
|
|
|
|
* They need their own copies of local state, including a tuple table,
|
|
|
|
* es_param_exec_vals, etc.
|
2015-01-16 00:52:22 +01:00
|
|
|
*
|
|
|
|
* The ResultRelInfo array management is trickier than it looks. We
|
|
|
|
* create a fresh array for the child but copy all the content from the
|
|
|
|
* parent. This is because it's okay for the child to share any
|
|
|
|
* per-relation state the parent has already created --- but if the child
|
|
|
|
* sets up any ResultRelInfo fields, such as its own junkfilter, that
|
|
|
|
* state must *not* propagate back to the parent. (For one thing, the
|
|
|
|
* pointed-to data is in a memory context that won't last long enough.)
|
2002-12-18 01:14:47 +01:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_direction = ForwardScanDirection;
|
|
|
|
estate->es_snapshot = parentestate->es_snapshot;
|
|
|
|
estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
|
|
|
|
estate->es_range_table = parentestate->es_range_table;
|
|
|
|
estate->es_plannedstmt = parentestate->es_plannedstmt;
|
|
|
|
estate->es_junkFilter = parentestate->es_junkFilter;
|
|
|
|
estate->es_output_cid = parentestate->es_output_cid;
|
2015-01-16 00:52:22 +01:00
|
|
|
if (parentestate->es_num_result_relations > 0)
|
|
|
|
{
|
|
|
|
int numResultRelations = parentestate->es_num_result_relations;
|
|
|
|
ResultRelInfo *resultRelInfos;
|
|
|
|
|
|
|
|
resultRelInfos = (ResultRelInfo *)
|
|
|
|
palloc(numResultRelations * sizeof(ResultRelInfo));
|
|
|
|
memcpy(resultRelInfos, parentestate->es_result_relations,
|
|
|
|
numResultRelations * sizeof(ResultRelInfo));
|
|
|
|
estate->es_result_relations = resultRelInfos;
|
|
|
|
estate->es_num_result_relations = numResultRelations;
|
|
|
|
}
|
|
|
|
/* es_result_relation_info must NOT be copied */
|
2007-08-15 23:39:50 +02:00
|
|
|
/* es_trig_target_relations must NOT be copied */
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_rowMarks = parentestate->es_rowMarks;
|
2011-02-27 19:43:29 +01:00
|
|
|
estate->es_top_eflags = parentestate->es_top_eflags;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_instrument = parentestate->es_instrument;
|
2011-02-27 19:43:29 +01:00
|
|
|
/* es_auxmodifytables must NOT be copied */
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The external param list is simply shared from parent. The internal
|
|
|
|
* param workspace has to be local state, but we copy the initial values
|
|
|
|
* from the parent, so as to have access to any param values that were
|
|
|
|
* already set from other parts of the parent's plan tree.
|
|
|
|
*/
|
|
|
|
estate->es_param_list_info = parentestate->es_param_list_info;
|
2017-11-13 21:24:12 +01:00
|
|
|
if (parentestate->es_plannedstmt->paramExecTypes != NIL)
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
{
|
2017-11-13 21:24:12 +01:00
|
|
|
int i;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
|
2017-11-13 21:24:12 +01:00
|
|
|
i = list_length(parentestate->es_plannedstmt->paramExecTypes);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_param_exec_vals = (ParamExecData *)
|
|
|
|
palloc0(i * sizeof(ParamExecData));
|
|
|
|
while (--i >= 0)
|
|
|
|
{
|
|
|
|
/* copy value if any, but not execPlan link */
|
|
|
|
estate->es_param_exec_vals[i].value =
|
|
|
|
parentestate->es_param_exec_vals[i].value;
|
|
|
|
estate->es_param_exec_vals[i].isnull =
|
|
|
|
parentestate->es_param_exec_vals[i].isnull;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Each EState must have its own es_epqScanDone state, but if we have
|
2014-05-06 18:12:18 +02:00
|
|
|
* nested EPQ checks they should share es_epqTuple arrays. This allows
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* sub-rechecks to inherit the values being examined by an outer recheck.
|
|
|
|
*/
|
|
|
|
estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
|
|
|
|
if (parentestate->es_epqTuple != NULL)
|
|
|
|
{
|
|
|
|
estate->es_epqTuple = parentestate->es_epqTuple;
|
|
|
|
estate->es_epqTupleSet = parentestate->es_epqTupleSet;
|
|
|
|
}
|
2002-12-18 01:14:47 +01:00
|
|
|
else
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
{
|
|
|
|
estate->es_epqTuple = (HeapTuple *)
|
|
|
|
palloc0(rtsize * sizeof(HeapTuple));
|
|
|
|
estate->es_epqTupleSet = (bool *)
|
|
|
|
palloc0(rtsize * sizeof(bool));
|
|
|
|
}
|
2002-12-18 01:14:47 +01:00
|
|
|
|
2007-02-27 02:11:26 +01:00
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* Each estate also has its own tuple table.
|
2007-02-27 02:11:26 +01:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_tupleTable = NIL;
|
2002-12-18 01:14:47 +01:00
|
|
|
|
2007-02-27 02:11:26 +01:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Initialize private state information for each SubPlan. We must do this
|
|
|
|
* before running ExecInitNode on the main query tree, since
|
2010-02-26 03:01:40 +01:00
|
|
|
* ExecInitSubPlan expects to be able to find these entries. Some of the
|
|
|
|
* SubPlans might not be used in the part of the plan tree we intend to
|
|
|
|
* run, but since it's not easy to tell which, we just initialize them
|
2012-01-28 23:43:57 +01:00
|
|
|
* all.
|
2007-02-27 02:11:26 +01:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
Assert(estate->es_subplanstates == NIL);
|
|
|
|
foreach(l, parentestate->es_plannedstmt->subplans)
|
2007-02-27 02:11:26 +01:00
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
Plan *subplan = (Plan *) lfirst(l);
|
|
|
|
PlanState *subplanstate;
|
2007-02-27 02:11:26 +01:00
|
|
|
|
2012-01-28 23:43:57 +01:00
|
|
|
subplanstate = ExecInitNode(subplan, estate, 0);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
estate->es_subplanstates = lappend(estate->es_subplanstates,
|
|
|
|
subplanstate);
|
2007-02-27 02:11:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Initialize the private state information for all the nodes in the part
|
|
|
|
* of the plan tree we need to run. This opens files, allocates storage
|
|
|
|
* and leaves us ready to start processing tuples.
|
2007-02-27 02:11:26 +01:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
epqstate->planstate = ExecInitNode(planTree, estate, 0);
|
2002-12-18 01:14:47 +01:00
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* EvalPlanQualEnd -- shut down at termination of parent plan state node,
|
|
|
|
* or if we are done with the current EPQ child.
|
2002-12-18 01:14:47 +01:00
|
|
|
*
|
|
|
|
* This is a cut-down version of ExecutorEnd(); basically we want to do most
|
|
|
|
* of the normal cleanup, but *not* close result relations (which we are
|
2014-05-06 18:12:18 +02:00
|
|
|
* just sharing from the outer query). We do, however, have to close any
|
2007-08-15 23:39:50 +02:00
|
|
|
* trigger target relations that got opened, since those are not shared.
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
* (There probably shouldn't be any of the latter, but just in case...)
|
2002-12-18 01:14:47 +01:00
|
|
|
*/
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
void
|
|
|
|
EvalPlanQualEnd(EPQState *epqstate)
|
2002-12-18 01:14:47 +01:00
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
EState *estate = epqstate->estate;
|
2002-12-18 01:14:47 +01:00
|
|
|
MemoryContext oldcontext;
|
2007-02-27 02:11:26 +01:00
|
|
|
ListCell *l;
|
2002-12-18 01:14:47 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
if (estate == NULL)
|
|
|
|
return; /* idle, so nothing to do */
|
2002-12-18 01:14:47 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
|
|
|
|
|
|
ExecEndNode(epqstate->planstate);
|
2002-12-18 01:14:47 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
foreach(l, estate->es_subplanstates)
|
2007-02-27 02:11:26 +01:00
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
PlanState *subplanstate = (PlanState *) lfirst(l);
|
2007-02-27 02:11:26 +01:00
|
|
|
|
|
|
|
ExecEndNode(subplanstate);
|
|
|
|
}
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/* throw away the per-estate tuple table */
|
|
|
|
ExecResetTupleTable(estate->es_tupleTable, false);
|
2002-12-18 01:14:47 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/* close any trigger target relations attached to this EState */
|
2017-05-16 18:46:32 +02:00
|
|
|
ExecCleanUpTriggerState(estate);
|
2007-08-15 23:39:50 +02:00
|
|
|
|
2002-12-18 01:14:47 +01:00
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
FreeExecutorState(estate);
|
2002-12-18 01:14:47 +01:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/* Mark EPQState idle */
|
|
|
|
epqstate->estate = NULL;
|
|
|
|
epqstate->planstate = NULL;
|
|
|
|
epqstate->origslot = NULL;
|
2000-04-07 02:59:17 +02:00
|
|
|
}
|