1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* readfuncs.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Reader functions for Postgres tree nodes.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2018-01-03 05:30:12 +01:00
|
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/nodes/readfuncs.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* NOTES
|
2015-09-23 17:51:50 +02:00
|
|
|
* Path nodes do not have any readfuncs support, because we never
|
|
|
|
* have occasion to read them in. (There was once code here that
|
2002-12-13 20:46:01 +01:00
|
|
|
* claimed to read them, but it was broken as well as unused.) We
|
|
|
|
* never read executor state trees, either.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2008-08-29 01:09:48 +02:00
|
|
|
* Parse location fields are written out by outfuncs.c, but only for
|
|
|
|
* possible debugging use. When reading a location field, we discard
|
|
|
|
* the stored value and set the location field to -1 (ie, "unknown").
|
|
|
|
* This is because nodes coming from a stored rule should not be thought
|
|
|
|
* to have a known location in the current query's text.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2001-01-07 02:08:48 +01:00
|
|
|
#include <math.h>
|
|
|
|
|
2015-11-12 13:40:31 +01:00
|
|
|
#include "fmgr.h"
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
#include "nodes/extensible.h"
|
2002-11-25 19:12:12 +01:00
|
|
|
#include "nodes/parsenodes.h"
|
2015-09-23 17:51:50 +02:00
|
|
|
#include "nodes/plannodes.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "nodes/readfuncs.h"
|
2017-10-12 01:52:46 +02:00
|
|
|
#include "utils/builtins.h"
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Macros to simplify reading of different kinds of fields. Use these
|
2014-05-06 18:12:18 +02:00
|
|
|
* wherever possible to reduce the chance for silly typos. Note that these
|
2002-11-25 19:12:12 +01:00
|
|
|
* hard-wire conventions about the names of the local variables in a Read
|
|
|
|
* routine.
|
|
|
|
*/
|
|
|
|
|
2004-05-26 06:41:50 +02:00
|
|
|
/* Macros for declaring appropriate local variables */
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
/* A few guys need only local_node */
|
|
|
|
#define READ_LOCALS_NO_FIELDS(nodeTypeName) \
|
|
|
|
nodeTypeName *local_node = makeNode(nodeTypeName)
|
|
|
|
|
|
|
|
/* And a few guys need only the pg_strtok support fields */
|
2004-05-26 06:41:50 +02:00
|
|
|
#define READ_TEMP_LOCALS() \
|
|
|
|
char *token; \
|
2012-07-01 04:27:49 +02:00
|
|
|
int length
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2004-05-26 06:41:50 +02:00
|
|
|
/* ... but most need both */
|
|
|
|
#define READ_LOCALS(nodeTypeName) \
|
|
|
|
READ_LOCALS_NO_FIELDS(nodeTypeName); \
|
|
|
|
READ_TEMP_LOCALS()
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/* Read an integer field (anything written as ":fldname %d") */
|
|
|
|
#define READ_INT_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = atoi(token)
|
|
|
|
|
|
|
|
/* Read an unsigned integer field (anything written as ":fldname %u") */
|
|
|
|
#define READ_UINT_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = atoui(token)
|
|
|
|
|
2017-10-12 01:52:46 +02:00
|
|
|
/* Read an unsigned integer field (anything written using UINT64_FORMAT) */
|
|
|
|
#define READ_UINT64_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = pg_strtouint64(token, NULL, 10)
|
|
|
|
|
2018-03-29 21:18:53 +02:00
|
|
|
/* Read a long integer field (anything written as ":fldname %ld") */
|
2015-09-23 17:51:50 +02:00
|
|
|
#define READ_LONG_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = atol(token)
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/* Read an OID field (don't hard-wire assumption that OID is same as uint) */
|
|
|
|
#define READ_OID_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = atooid(token)
|
|
|
|
|
|
|
|
/* Read a char field (ie, one ascii character) */
|
|
|
|
#define READ_CHAR_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
2017-06-22 04:57:23 +02:00
|
|
|
/* avoid overhead of calling debackslash() for one char */ \
|
|
|
|
local_node->fldname = (length == 0) ? '\0' : (token[0] == '\\' ? token[1] : token[0])
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
/* Read an enumerated-type field that was written as an integer code */
|
|
|
|
#define READ_ENUM_FIELD(fldname, enumtype) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = (enumtype) atoi(token)
|
|
|
|
|
|
|
|
/* Read a float field */
|
|
|
|
#define READ_FLOAT_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = atof(token)
|
|
|
|
|
|
|
|
/* Read a boolean field */
|
|
|
|
#define READ_BOOL_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = strtobool(token)
|
|
|
|
|
|
|
|
/* Read a character-string field */
|
|
|
|
#define READ_STRING_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
|
|
|
local_node->fldname = nullable_string(token, length)
|
|
|
|
|
2008-08-29 01:09:48 +02:00
|
|
|
/* Read a parse location field (and throw away the value, per notes above) */
|
|
|
|
#define READ_LOCATION_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
token = pg_strtok(&length); /* get field value */ \
|
2014-01-16 10:55:08 +01:00
|
|
|
(void) token; /* in case not used elsewhere */ \
|
2009-06-11 16:49:15 +02:00
|
|
|
local_node->fldname = -1 /* set field to "unknown" */
|
2008-08-29 01:09:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/* Read a Node field */
|
|
|
|
#define READ_NODE_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
2012-07-01 04:27:49 +02:00
|
|
|
(void) token; /* in case not used elsewhere */ \
|
2004-05-06 16:01:33 +02:00
|
|
|
local_node->fldname = nodeRead(NULL, 0)
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2009-01-22 21:16:10 +01:00
|
|
|
/* Read a bitmapset field */
|
|
|
|
#define READ_BITMAPSET_FIELD(fldname) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
2014-01-16 10:55:08 +01:00
|
|
|
(void) token; /* in case not used elsewhere */ \
|
2009-01-22 21:16:10 +01:00
|
|
|
local_node->fldname = _readBitmapset()
|
|
|
|
|
2015-09-23 17:51:50 +02:00
|
|
|
/* Read an attribute number array */
|
|
|
|
#define READ_ATTRNUMBER_ARRAY(fldname, len) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
local_node->fldname = readAttrNumberCols(len);
|
|
|
|
|
|
|
|
/* Read an oid array */
|
|
|
|
#define READ_OID_ARRAY(fldname, len) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
local_node->fldname = readOidCols(len);
|
|
|
|
|
|
|
|
/* Read an int array */
|
|
|
|
#define READ_INT_ARRAY(fldname, len) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
local_node->fldname = readIntCols(len);
|
|
|
|
|
|
|
|
/* Read a bool array */
|
|
|
|
#define READ_BOOL_ARRAY(fldname, len) \
|
|
|
|
token = pg_strtok(&length); /* skip :fldname */ \
|
|
|
|
local_node->fldname = readBoolCols(len);
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/* Routine exit */
|
|
|
|
#define READ_DONE() \
|
|
|
|
return local_node
|
2001-01-07 02:08:48 +01:00
|
|
|
|
|
|
|
|
2001-01-08 01:31:43 +01:00
|
|
|
/*
|
|
|
|
* NOTE: use atoi() to read values written with %d, or atoui() to read
|
|
|
|
* values written with %u in outfuncs.c. An exception is OID values,
|
2014-05-06 18:12:18 +02:00
|
|
|
* for which use atooid(). (As of 7.1, outfuncs.c writes OIDs as %u,
|
2001-01-08 01:31:43 +01:00
|
|
|
* but this will probably change in the future.)
|
|
|
|
*/
|
|
|
|
#define atoui(x) ((unsigned int) strtoul((x), NULL, 10))
|
|
|
|
|
|
|
|
#define strtobool(x) ((*(x) == 't') ? true : false)
|
|
|
|
|
|
|
|
#define nullable_string(token,length) \
|
2004-01-07 19:56:30 +01:00
|
|
|
((length) == 0 ? NULL : debackslash(token, length))
|
2001-01-08 01:31:43 +01:00
|
|
|
|
|
|
|
|
2009-01-22 21:16:10 +01:00
|
|
|
/*
|
|
|
|
* _readBitmapset
|
|
|
|
*/
|
|
|
|
static Bitmapset *
|
|
|
|
_readBitmapset(void)
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
Bitmapset *result = NULL;
|
|
|
|
|
2009-01-22 21:16:10 +01:00
|
|
|
READ_TEMP_LOCALS();
|
|
|
|
|
|
|
|
token = pg_strtok(&length);
|
|
|
|
if (token == NULL)
|
|
|
|
elog(ERROR, "incomplete Bitmapset structure");
|
|
|
|
if (length != 1 || token[0] != '(')
|
|
|
|
elog(ERROR, "unrecognized token: \"%.*s\"", length, token);
|
|
|
|
|
|
|
|
token = pg_strtok(&length);
|
|
|
|
if (token == NULL)
|
|
|
|
elog(ERROR, "incomplete Bitmapset structure");
|
|
|
|
if (length != 1 || token[0] != 'b')
|
|
|
|
elog(ERROR, "unrecognized token: \"%.*s\"", length, token);
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
int val;
|
|
|
|
char *endptr;
|
2009-01-22 21:16:10 +01:00
|
|
|
|
|
|
|
token = pg_strtok(&length);
|
|
|
|
if (token == NULL)
|
|
|
|
elog(ERROR, "unterminated Bitmapset structure");
|
|
|
|
if (length == 1 && token[0] == ')')
|
|
|
|
break;
|
|
|
|
val = (int) strtol(token, &endptr, 10);
|
|
|
|
if (endptr != token + length)
|
|
|
|
elog(ERROR, "unrecognized integer: \"%.*s\"", length, token);
|
|
|
|
result = bms_add_member(result, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
/*
|
|
|
|
* for use by extensions which define extensible nodes
|
|
|
|
*/
|
|
|
|
Bitmapset *
|
|
|
|
readBitmapset(void)
|
|
|
|
{
|
|
|
|
return _readBitmapset();
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readQuery
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
static Query *
|
2000-10-23 00:15:13 +02:00
|
|
|
_readQuery(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(Query);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(commandType, CmdType);
|
|
|
|
READ_ENUM_FIELD(querySource, QuerySource);
|
2017-10-12 01:52:46 +02:00
|
|
|
local_node->queryId = UINT64CONST(0); /* not saved in output format */
|
2003-05-02 22:54:36 +02:00
|
|
|
READ_BOOL_FIELD(canSetTag);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(utilityStmt);
|
|
|
|
READ_INT_FIELD(resultRelation);
|
|
|
|
READ_BOOL_FIELD(hasAggs);
|
2008-12-28 19:54:01 +01:00
|
|
|
READ_BOOL_FIELD(hasWindowFuncs);
|
2016-09-13 19:54:24 +02:00
|
|
|
READ_BOOL_FIELD(hasTargetSRFs);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_BOOL_FIELD(hasSubLinks);
|
2008-08-02 23:32:01 +02:00
|
|
|
READ_BOOL_FIELD(hasDistinctOn);
|
2008-10-04 23:56:55 +02:00
|
|
|
READ_BOOL_FIELD(hasRecursive);
|
2011-02-26 00:56:23 +01:00
|
|
|
READ_BOOL_FIELD(hasModifyingCTE);
|
2009-10-28 15:55:47 +01:00
|
|
|
READ_BOOL_FIELD(hasForUpdate);
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
READ_BOOL_FIELD(hasRowSecurity);
|
2008-10-04 23:56:55 +02:00
|
|
|
READ_NODE_FIELD(cteList);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(rtable);
|
|
|
|
READ_NODE_FIELD(jointree);
|
|
|
|
READ_NODE_FIELD(targetList);
|
2017-04-06 14:33:16 +02:00
|
|
|
READ_ENUM_FIELD(override, OverridingKind);
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
READ_NODE_FIELD(onConflict);
|
2006-08-12 04:52:06 +02:00
|
|
|
READ_NODE_FIELD(returningList);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(groupClause);
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
READ_NODE_FIELD(groupingSets);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(havingQual);
|
2008-12-28 19:54:01 +01:00
|
|
|
READ_NODE_FIELD(windowClause);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(distinctClause);
|
|
|
|
READ_NODE_FIELD(sortClause);
|
|
|
|
READ_NODE_FIELD(limitOffset);
|
|
|
|
READ_NODE_FIELD(limitCount);
|
2006-04-30 20:30:40 +02:00
|
|
|
READ_NODE_FIELD(rowMarks);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(setOperations);
|
2010-08-07 04:44:09 +02:00
|
|
|
READ_NODE_FIELD(constraintDeps);
|
Change representation of statement lists, and add statement location info.
This patch makes several changes that improve the consistency of
representation of lists of statements. It's always been the case
that the output of parse analysis is a list of Query nodes, whatever
the types of the individual statements in the list. This patch brings
similar consistency to the outputs of raw parsing and planning steps:
* The output of raw parsing is now always a list of RawStmt nodes;
the statement-type-dependent nodes are one level down from that.
* The output of pg_plan_queries() is now always a list of PlannedStmt
nodes, even for utility statements. In the case of a utility statement,
"planning" just consists of wrapping a CMD_UTILITY PlannedStmt around
the utility node. This list representation is now used in Portal and
CachedPlan plan lists, replacing the former convention of intermixing
PlannedStmts with bare utility-statement nodes.
Now, every list of statements has a consistent head-node type depending
on how far along it is in processing. This allows changing many places
that formerly used generic "Node *" pointers to use a more specific
pointer type, thus reducing the number of IsA() tests and casts needed,
as well as improving code clarity.
Also, the post-parse-analysis representation of DECLARE CURSOR is changed
so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained
SELECT remains a child of the DeclareCursorStmt rather than getting flipped
around to be the other way. It's now true for both Query and PlannedStmt
that utilityStmt is non-null if and only if commandType is CMD_UTILITY.
That allows simplifying a lot of places that were testing both fields.
(I think some of those were just defensive programming, but in many places,
it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.)
Because PlannedStmt carries a canSetTag field, we're also able to get rid
of some ad-hoc rules about how to reconstruct canSetTag for a bare utility
statement; specifically, the assumption that a utility is canSetTag if and
only if it's the only one in its list. While I see no near-term need for
relaxing that restriction, it's nice to get rid of the ad-hocery.
The API of ProcessUtility() is changed so that what it's passed is the
wrapper PlannedStmt not just the bare utility statement. This will affect
all users of ProcessUtility_hook, but the changes are pretty trivial; see
the affected contrib modules for examples of the minimum change needed.
(Most compilers should give pointer-type-mismatch warnings for uncorrected
code.)
There's also a change in the API of ExplainOneQuery_hook, to pass through
cursorOptions instead of expecting hook functions to know what to pick.
This is needed because of the DECLARE CURSOR changes, but really should
have been done in 9.6; it's unlikely that any extant hook functions
know about using CURSOR_OPT_PARALLEL_OK.
Finally, teach gram.y to save statement boundary locations in RawStmt
nodes, and pass those through to Query and PlannedStmt nodes. This allows
more intelligent handling of cases where a source query string contains
multiple statements. This patch doesn't actually do anything with the
information, but a follow-on patch will. (Passing this information through
cleanly is the true motivation for these changes; while I think this is all
good cleanup, it's unlikely we'd have bothered without this end goal.)
catversion bump because addition of location fields to struct Query
affects stored rules.
This patch is by me, but it owes a good deal to Fabien Coelho who did
a lot of preliminary work on the problem, and also reviewed the patch.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
|
|
|
/* withCheckOptions intentionally omitted, see comment in parsenodes.h */
|
|
|
|
READ_LOCATION_FIELD(stmt_location);
|
|
|
|
READ_LOCATION_FIELD(stmt_len);
|
2000-11-12 01:37:02 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1998-01-06 19:53:02 +01:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readNotifyStmt
|
2002-03-21 17:02:16 +01:00
|
|
|
*/
|
|
|
|
static NotifyStmt *
|
|
|
|
_readNotifyStmt(void)
|
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(NotifyStmt);
|
2002-03-21 17:02:16 +01:00
|
|
|
|
2008-09-01 22:42:46 +02:00
|
|
|
READ_STRING_FIELD(conditionname);
|
2010-02-16 23:34:57 +01:00
|
|
|
READ_STRING_FIELD(payload);
|
2002-03-21 17:02:16 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
2002-03-21 17:02:16 +01:00
|
|
|
}
|
|
|
|
|
2003-03-10 04:53:52 +01:00
|
|
|
/*
|
|
|
|
* _readDeclareCursorStmt
|
|
|
|
*/
|
|
|
|
static DeclareCursorStmt *
|
|
|
|
_readDeclareCursorStmt(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(DeclareCursorStmt);
|
|
|
|
|
|
|
|
READ_STRING_FIELD(portalname);
|
|
|
|
READ_INT_FIELD(options);
|
|
|
|
READ_NODE_FIELD(query);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2013-07-18 23:10:16 +02:00
|
|
|
/*
|
|
|
|
* _readWithCheckOption
|
|
|
|
*/
|
|
|
|
static WithCheckOption *
|
|
|
|
_readWithCheckOption(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(WithCheckOption);
|
|
|
|
|
2015-04-25 02:34:26 +02:00
|
|
|
READ_ENUM_FIELD(kind, WCOKind);
|
|
|
|
READ_STRING_FIELD(relname);
|
2015-09-15 21:49:31 +02:00
|
|
|
READ_STRING_FIELD(polname);
|
2013-07-18 23:10:16 +02:00
|
|
|
READ_NODE_FIELD(qual);
|
|
|
|
READ_BOOL_FIELD(cascaded);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
2008-08-02 23:32:01 +02:00
|
|
|
* _readSortGroupClause
|
1998-01-06 19:53:02 +01:00
|
|
|
*/
|
2008-08-02 23:32:01 +02:00
|
|
|
static SortGroupClause *
|
|
|
|
_readSortGroupClause(void)
|
1998-01-06 19:53:02 +01:00
|
|
|
{
|
2008-08-02 23:32:01 +02:00
|
|
|
READ_LOCALS(SortGroupClause);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_UINT_FIELD(tleSortGroupRef);
|
2008-08-02 23:32:01 +02:00
|
|
|
READ_OID_FIELD(eqop);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_OID_FIELD(sortop);
|
2007-01-09 03:14:16 +01:00
|
|
|
READ_BOOL_FIELD(nulls_first);
|
2010-10-31 02:55:20 +01:00
|
|
|
READ_BOOL_FIELD(hashable);
|
1999-05-12 17:02:39 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
/*
|
|
|
|
* _readGroupingSet
|
|
|
|
*/
|
|
|
|
static GroupingSet *
|
|
|
|
_readGroupingSet(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(GroupingSet);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(kind, GroupingSetKind);
|
|
|
|
READ_NODE_FIELD(content);
|
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2008-12-28 19:54:01 +01:00
|
|
|
/*
|
|
|
|
* _readWindowClause
|
|
|
|
*/
|
|
|
|
static WindowClause *
|
|
|
|
_readWindowClause(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(WindowClause);
|
|
|
|
|
|
|
|
READ_STRING_FIELD(name);
|
|
|
|
READ_STRING_FIELD(refname);
|
|
|
|
READ_NODE_FIELD(partitionClause);
|
|
|
|
READ_NODE_FIELD(orderClause);
|
2008-12-31 01:08:39 +01:00
|
|
|
READ_INT_FIELD(frameOptions);
|
2010-02-12 18:33:21 +01:00
|
|
|
READ_NODE_FIELD(startOffset);
|
|
|
|
READ_NODE_FIELD(endOffset);
|
Support all SQL:2011 options for window frame clauses.
This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING"
frame boundaries in window functions. We'd punted on that back in the
original patch to add window functions, because it was not clear how to
do it in a reasonably data-type-extensible fashion. That problem is
resolved here by adding the ability for btree operator classes to provide
an "in_range" support function that defines how to add or subtract the
RANGE offset value. Factoring it this way also allows the operator class
to avoid overflow problems near the ends of the datatype's range, if it
wishes to expend effort on that. (In the committed patch, the integer
opclasses handle that issue, but it did not seem worth the trouble to
avoid overflow failures for datetime types.)
The patch includes in_range support for the integer_ops opfamily
(int2/int4/int8) as well as the standard datetime types. Support for
other numeric types has been requested, but that seems like suitable
material for a follow-on patch.
In addition, the patch adds GROUPS mode which counts the offset in
ORDER-BY peer groups rather than rows, and it adds the frame_exclusion
options specified by SQL:2011. As far as I can see, we are now fully
up to spec on window framing options.
Existing behaviors remain unchanged, except that I changed the errcode
for a couple of existing error reports to meet the SQL spec's expectation
that negative "offset" values should be reported as SQLSTATE 22013.
Internally and in relevant parts of the documentation, we now consistently
use the terminology "offset PRECEDING/FOLLOWING" rather than "value
PRECEDING/FOLLOWING", since the term "value" is confusingly vague.
Oliver Ford, reviewed and whacked around some by me
Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
2018-02-07 06:06:50 +01:00
|
|
|
READ_OID_FIELD(startInRangeFunc);
|
|
|
|
READ_OID_FIELD(endInRangeFunc);
|
|
|
|
READ_OID_FIELD(inRangeColl);
|
|
|
|
READ_BOOL_FIELD(inRangeAsc);
|
|
|
|
READ_BOOL_FIELD(inRangeNullsFirst);
|
2008-12-28 19:54:01 +01:00
|
|
|
READ_UINT_FIELD(winref);
|
|
|
|
READ_BOOL_FIELD(copiedOrder);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2006-04-30 20:30:40 +02:00
|
|
|
/*
|
|
|
|
* _readRowMarkClause
|
|
|
|
*/
|
|
|
|
static RowMarkClause *
|
|
|
|
_readRowMarkClause(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(RowMarkClause);
|
|
|
|
|
|
|
|
READ_UINT_FIELD(rti);
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
READ_ENUM_FIELD(strength, LockClauseStrength);
|
2014-10-07 22:23:34 +02:00
|
|
|
READ_ENUM_FIELD(waitPolicy, LockWaitPolicy);
|
2009-10-28 15:55:47 +01:00
|
|
|
READ_BOOL_FIELD(pushedDown);
|
2006-04-30 20:30:40 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2008-10-04 23:56:55 +02:00
|
|
|
/*
|
|
|
|
* _readCommonTableExpr
|
|
|
|
*/
|
|
|
|
static CommonTableExpr *
|
|
|
|
_readCommonTableExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CommonTableExpr);
|
|
|
|
|
|
|
|
READ_STRING_FIELD(ctename);
|
|
|
|
READ_NODE_FIELD(aliascolnames);
|
|
|
|
READ_NODE_FIELD(ctequery);
|
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
READ_BOOL_FIELD(cterecursive);
|
|
|
|
READ_INT_FIELD(cterefcount);
|
|
|
|
READ_NODE_FIELD(ctecolnames);
|
|
|
|
READ_NODE_FIELD(ctecoltypes);
|
|
|
|
READ_NODE_FIELD(ctecoltypmods);
|
2011-02-08 22:04:18 +01:00
|
|
|
READ_NODE_FIELD(ctecolcollations);
|
2008-10-04 23:56:55 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readSetOperationStmt
|
2000-10-05 21:11:39 +02:00
|
|
|
*/
|
|
|
|
static SetOperationStmt *
|
2000-10-23 00:15:13 +02:00
|
|
|
_readSetOperationStmt(void)
|
2000-10-05 21:11:39 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(SetOperationStmt);
|
2000-10-05 21:11:39 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_ENUM_FIELD(op, SetOperation);
|
|
|
|
READ_BOOL_FIELD(all);
|
|
|
|
READ_NODE_FIELD(larg);
|
|
|
|
READ_NODE_FIELD(rarg);
|
2004-05-26 06:41:50 +02:00
|
|
|
READ_NODE_FIELD(colTypes);
|
2006-08-10 04:36:29 +02:00
|
|
|
READ_NODE_FIELD(colTypmods);
|
2011-02-08 22:04:18 +01:00
|
|
|
READ_NODE_FIELD(colCollations);
|
2008-08-07 03:11:52 +02:00
|
|
|
READ_NODE_FIELD(groupClauses);
|
2000-10-05 21:11:39 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
|
|
|
}
|
2000-10-05 21:11:39 +02:00
|
|
|
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* Stuff from primnodes.h.
|
|
|
|
*/
|
2000-10-05 21:11:39 +02:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static Alias *
|
|
|
|
_readAlias(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_LOCALS(Alias);
|
2000-02-15 21:49:31 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_STRING_FIELD(aliasname);
|
|
|
|
READ_NODE_FIELD(colnames);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_DONE();
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static RangeVar *
|
|
|
|
_readRangeVar(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(RangeVar);
|
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
local_node->catalogname = NULL; /* not currently saved in output format */
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
READ_STRING_FIELD(schemaname);
|
|
|
|
READ_STRING_FIELD(relname);
|
2016-12-23 19:35:11 +01:00
|
|
|
READ_BOOL_FIELD(inh);
|
2010-12-13 18:34:26 +01:00
|
|
|
READ_CHAR_FIELD(relpersistence);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_NODE_FIELD(alias);
|
2008-09-01 22:42:46 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2017-03-08 16:39:37 +01:00
|
|
|
/*
|
|
|
|
* _readTableFunc
|
|
|
|
*/
|
|
|
|
static TableFunc *
|
|
|
|
_readTableFunc(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(TableFunc);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(ns_uris);
|
2017-06-16 09:16:11 +02:00
|
|
|
READ_NODE_FIELD(ns_names);
|
2017-03-08 16:39:37 +01:00
|
|
|
READ_NODE_FIELD(docexpr);
|
|
|
|
READ_NODE_FIELD(rowexpr);
|
|
|
|
READ_NODE_FIELD(colnames);
|
|
|
|
READ_NODE_FIELD(coltypes);
|
|
|
|
READ_NODE_FIELD(coltypmods);
|
|
|
|
READ_NODE_FIELD(colcollations);
|
|
|
|
READ_NODE_FIELD(colexprs);
|
|
|
|
READ_NODE_FIELD(coldefexprs);
|
|
|
|
READ_BITMAPSET_FIELD(notnulls);
|
|
|
|
READ_INT_FIELD(ordinalitycol);
|
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2007-02-20 18:32:18 +01:00
|
|
|
static IntoClause *
|
|
|
|
_readIntoClause(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(IntoClause);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(rel);
|
|
|
|
READ_NODE_FIELD(colNames);
|
|
|
|
READ_NODE_FIELD(options);
|
|
|
|
READ_ENUM_FIELD(onCommit, OnCommitAction);
|
|
|
|
READ_STRING_FIELD(tableSpaceName);
|
Clean up the mess around EXPLAIN and materialized views.
Revert the matview-related changes in explain.c's API, as per recent
complaint from Robert Haas. The reason for these appears to have been
principally some ill-considered choices around having intorel_startup do
what ought to be parse-time checking, plus a poor arrangement for passing
it the view parsetree it needs to store into pg_rewrite when creating a
materialized view. Do the latter by having parse analysis stick a copy
into the IntoClause, instead of doing it at runtime. (On the whole,
I seriously question the choice to represent CREATE MATERIALIZED VIEW as a
variant of SELECT INTO/CREATE TABLE AS, because that means injecting even
more complexity into what was already a horrid legacy kluge. However,
I didn't go so far as to rethink that choice ... yet.)
I also moved several error checks into matview parse analysis, and
made the check for external Params in a matview more accurate.
In passing, clean things up a bit more around interpretOidsOption(),
and fix things so that we can use that to force no-oids for views,
sequences, etc, thereby eliminating the need to cons up "oids = false"
options when creating them.
catversion bump due to change in IntoClause. (I wonder though if we
really need readfuncs/outfuncs support for IntoClause anymore.)
2013-04-13 01:25:20 +02:00
|
|
|
READ_NODE_FIELD(viewQuery);
|
2011-11-25 05:21:06 +01:00
|
|
|
READ_BOOL_FIELD(skipData);
|
2007-02-20 18:32:18 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readVar
|
|
|
|
*/
|
|
|
|
static Var *
|
|
|
|
_readVar(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Var);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_UINT_FIELD(varno);
|
|
|
|
READ_INT_FIELD(varattno);
|
|
|
|
READ_OID_FIELD(vartype);
|
|
|
|
READ_INT_FIELD(vartypmod);
|
2011-02-08 22:04:18 +01:00
|
|
|
READ_OID_FIELD(varcollid);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_UINT_FIELD(varlevelsup);
|
|
|
|
READ_UINT_FIELD(varnoold);
|
|
|
|
READ_INT_FIELD(varoattno);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2000-11-12 01:37:02 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readConst
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
static Const *
|
|
|
|
_readConst(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(Const);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_OID_FIELD(consttype);
|
2007-03-17 01:11:05 +01:00
|
|
|
READ_INT_FIELD(consttypmod);
|
2011-02-08 22:04:18 +01:00
|
|
|
READ_OID_FIELD(constcollid);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_INT_FIELD(constlen);
|
|
|
|
READ_BOOL_FIELD(constbyval);
|
|
|
|
READ_BOOL_FIELD(constisnull);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
token = pg_strtok(&length); /* skip :constvalue */
|
|
|
|
if (local_node->constisnull)
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
token = pg_strtok(&length); /* skip "<>" */
|
2002-11-25 19:12:12 +01:00
|
|
|
else
|
|
|
|
local_node->constvalue = readDatum(local_node->constbyval);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
2002-12-12 16:49:42 +01:00
|
|
|
* _readParam
|
|
|
|
*/
|
|
|
|
static Param *
|
|
|
|
_readParam(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Param);
|
|
|
|
|
2006-04-22 03:26:01 +02:00
|
|
|
READ_ENUM_FIELD(paramkind, ParamKind);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_INT_FIELD(paramid);
|
|
|
|
READ_OID_FIELD(paramtype);
|
2006-12-10 23:13:27 +01:00
|
|
|
READ_INT_FIELD(paramtypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(paramcollid);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readAggref
|
|
|
|
*/
|
|
|
|
static Aggref *
|
|
|
|
_readAggref(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Aggref);
|
|
|
|
|
|
|
|
READ_OID_FIELD(aggfnoid);
|
|
|
|
READ_OID_FIELD(aggtype);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(aggcollid);
|
|
|
|
READ_OID_FIELD(inputcollid);
|
Fix handling of argument and result datatypes for partial aggregation.
When doing partial aggregation, the args list of the upper (combining)
Aggref node is replaced by a Var representing the output of the partial
aggregation steps, which has either the aggregate's transition data type
or a serialized representation of that. However, nodeAgg.c blindly
continued to use the args list as an indication of the user-level argument
types. This broke resolution of polymorphic transition datatypes at
executor startup (though it accidentally failed to fail for the ANYARRAY
case, which is likely the only one anyone had tested). Moreover, the
constructed FuncExpr passed to the finalfunc contained completely wrong
information, which would have led to bogus answers or crashes for any case
where the finalfunc examined that information (which is only likely to be
with polymorphic aggregates using a non-polymorphic transition type).
As an independent bug, apply_partialaggref_adjustment neglected to resolve
a polymorphic transition datatype before assigning it as the output type
of the lower-level Aggref node. This again accidentally failed to fail
for ANYARRAY but would be unlikely to work in other cases.
To fix the first problem, record the user-level argument types in a
separate OID-list field of Aggref, and look to that rather than the args
list when asking what the argument types were. (It turns out to be
convenient to include any "direct" arguments in this list too, although
those are not currently subject to being overwritten.)
Rather than adding yet another resolve_aggregate_transtype() call to fix
the second problem, add an aggtranstype field to Aggref, and store the
resolved transition type OID there when the planner first computes it.
(By doing this in the planner and not the parser, we can allow the
aggregate's transition type to change from time to time, although no DDL
support yet exists for that.) This saves nothing of consequence for
simple non-polymorphic aggregates, but for polymorphic transition types
we save a catalog lookup during executor startup as well as several
planner lookups that are new in 9.6 due to parallel query planning.
In passing, fix an error that was introduced into count_agg_clauses_walker
some time ago: it was applying exprTypmod() to something that wasn't an
expression node at all, but a TargetEntry. exprTypmod silently returned
-1 so that there was not an obvious failure, but this broke the intended
sensitivity of aggregate space consumption estimates to the typmod of
varchar and similar data types. This part needs to be back-patched.
Catversion bump due to change of stored Aggref nodes.
Discussion: <8229.1466109074@sss.pgh.pa.us>
2016-06-18 03:44:37 +02:00
|
|
|
READ_OID_FIELD(aggtranstype);
|
|
|
|
READ_NODE_FIELD(aggargtypes);
|
Support ordered-set (WITHIN GROUP) aggregates.
This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()). We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.
Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions. To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c. This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need. There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.
In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates. Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.
Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
|
|
|
READ_NODE_FIELD(aggdirectargs);
|
2006-07-27 21:52:07 +02:00
|
|
|
READ_NODE_FIELD(args);
|
2009-12-15 18:57:48 +01:00
|
|
|
READ_NODE_FIELD(aggorder);
|
|
|
|
READ_NODE_FIELD(aggdistinct);
|
2013-07-17 02:15:36 +02:00
|
|
|
READ_NODE_FIELD(aggfilter);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_BOOL_FIELD(aggstar);
|
Allow aggregate functions to be VARIADIC.
There's no inherent reason why an aggregate function can't be variadic
(even VARIADIC ANY) if its transition function can handle the case.
Indeed, this patch to add the feature touches none of the planner or
executor, and little of the parser; the main missing stuff was DDL and
pg_dump support.
It is true that variadic aggregates can create the same sort of ambiguity
about parameters versus ORDER BY keys that was complained of when we
(briefly) had both one- and two-argument forms of string_agg(). However,
the policy formed in response to that discussion only said that we'd not
create any built-in aggregates with varying numbers of arguments, not that
we shouldn't allow users to do it. So the logical extension of that is
we can allow users to make variadic aggregates as long as we're wary about
shipping any such in core.
In passing, this patch allows aggregate function arguments to be named, to
the extent of remembering the names in pg_proc and dumping them in pg_dump.
You can't yet call an aggregate using named-parameter notation. That seems
like a likely future extension, but it'll take some work, and it's not what
this patch is really about. Likewise, there's still some work needed to
make window functions handle VARIADIC fully, but I left that for another
day.
initdb forced because of new aggvariadic field in Aggref parse nodes.
2013-09-03 23:08:38 +02:00
|
|
|
READ_BOOL_FIELD(aggvariadic);
|
Support ordered-set (WITHIN GROUP) aggregates.
This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()). We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.
Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions. To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c. This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need. There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.
In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates. Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.
Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
|
|
|
READ_CHAR_FIELD(aggkind);
|
2009-12-15 18:57:48 +01:00
|
|
|
READ_UINT_FIELD(agglevelsup);
|
2016-06-26 20:33:38 +02:00
|
|
|
READ_ENUM_FIELD(aggsplit, AggSplit);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
/*
|
|
|
|
* _readGroupingFunc
|
|
|
|
*/
|
|
|
|
static GroupingFunc *
|
|
|
|
_readGroupingFunc(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(GroupingFunc);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(args);
|
|
|
|
READ_NODE_FIELD(refs);
|
|
|
|
READ_NODE_FIELD(cols);
|
2015-08-06 02:44:27 +02:00
|
|
|
READ_UINT_FIELD(agglevelsup);
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2008-12-28 19:54:01 +01:00
|
|
|
/*
|
|
|
|
* _readWindowFunc
|
|
|
|
*/
|
|
|
|
static WindowFunc *
|
|
|
|
_readWindowFunc(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(WindowFunc);
|
|
|
|
|
|
|
|
READ_OID_FIELD(winfnoid);
|
|
|
|
READ_OID_FIELD(wintype);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(wincollid);
|
|
|
|
READ_OID_FIELD(inputcollid);
|
2008-12-28 19:54:01 +01:00
|
|
|
READ_NODE_FIELD(args);
|
2013-07-17 02:15:36 +02:00
|
|
|
READ_NODE_FIELD(aggfilter);
|
2008-12-28 19:54:01 +01:00
|
|
|
READ_UINT_FIELD(winref);
|
|
|
|
READ_BOOL_FIELD(winstar);
|
|
|
|
READ_BOOL_FIELD(winagg);
|
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*
|
|
|
|
* _readArrayRef
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-12-12 16:49:42 +01:00
|
|
|
static ArrayRef *
|
|
|
|
_readArrayRef(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_LOCALS(ArrayRef);
|
|
|
|
|
2003-04-09 01:20:04 +02:00
|
|
|
READ_OID_FIELD(refarraytype);
|
|
|
|
READ_OID_FIELD(refelemtype);
|
2007-03-17 01:11:05 +01:00
|
|
|
READ_INT_FIELD(reftypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(refcollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_NODE_FIELD(refupperindexpr);
|
|
|
|
READ_NODE_FIELD(reflowerindexpr);
|
|
|
|
READ_NODE_FIELD(refexpr);
|
|
|
|
READ_NODE_FIELD(refassgnexpr);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readFuncExpr
|
|
|
|
*/
|
|
|
|
static FuncExpr *
|
|
|
|
_readFuncExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(FuncExpr);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_OID_FIELD(funcid);
|
|
|
|
READ_OID_FIELD(funcresulttype);
|
|
|
|
READ_BOOL_FIELD(funcretset);
|
2013-01-22 02:25:26 +01:00
|
|
|
READ_BOOL_FIELD(funcvariadic);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_ENUM_FIELD(funcformat, CoercionForm);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(funccollid);
|
|
|
|
READ_OID_FIELD(inputcollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2009-10-08 04:39:25 +02:00
|
|
|
/*
|
|
|
|
* _readNamedArgExpr
|
|
|
|
*/
|
|
|
|
static NamedArgExpr *
|
|
|
|
_readNamedArgExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(NamedArgExpr);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_STRING_FIELD(name);
|
|
|
|
READ_INT_FIELD(argnumber);
|
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
2002-12-12 16:49:42 +01:00
|
|
|
* _readOpExpr
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-12-12 16:49:42 +01:00
|
|
|
static OpExpr *
|
|
|
|
_readOpExpr(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_LOCALS(OpExpr);
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_OID_FIELD(opno);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_OID_FIELD(opfuncid);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_OID_FIELD(opresulttype);
|
|
|
|
READ_BOOL_FIELD(opretset);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(opcollid);
|
|
|
|
READ_OID_FIELD(inputcollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
2002-12-12 16:49:42 +01:00
|
|
|
* _readDistinctExpr
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-12-12 16:49:42 +01:00
|
|
|
static DistinctExpr *
|
|
|
|
_readDistinctExpr(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_LOCALS(DistinctExpr);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_OID_FIELD(opno);
|
|
|
|
READ_OID_FIELD(opfuncid);
|
|
|
|
READ_OID_FIELD(opresulttype);
|
|
|
|
READ_BOOL_FIELD(opretset);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(opcollid);
|
|
|
|
READ_OID_FIELD(inputcollid);
|
|
|
|
READ_NODE_FIELD(args);
|
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readNullIfExpr
|
|
|
|
*/
|
|
|
|
static NullIfExpr *
|
|
|
|
_readNullIfExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(NullIfExpr);
|
|
|
|
|
|
|
|
READ_OID_FIELD(opno);
|
|
|
|
READ_OID_FIELD(opfuncid);
|
|
|
|
READ_OID_FIELD(opresulttype);
|
|
|
|
READ_BOOL_FIELD(opretset);
|
|
|
|
READ_OID_FIELD(opcollid);
|
|
|
|
READ_OID_FIELD(inputcollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2003-06-29 02:33:44 +02:00
|
|
|
/*
|
|
|
|
* _readScalarArrayOpExpr
|
|
|
|
*/
|
|
|
|
static ScalarArrayOpExpr *
|
|
|
|
_readScalarArrayOpExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(ScalarArrayOpExpr);
|
|
|
|
|
|
|
|
READ_OID_FIELD(opno);
|
|
|
|
READ_OID_FIELD(opfuncid);
|
|
|
|
READ_BOOL_FIELD(useOr);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(inputcollid);
|
2003-06-29 02:33:44 +02:00
|
|
|
READ_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2003-06-29 02:33:44 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*
|
|
|
|
* _readBoolExpr
|
|
|
|
*/
|
|
|
|
static BoolExpr *
|
|
|
|
_readBoolExpr(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_LOCALS(BoolExpr);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/* do-it-yourself enum representation */
|
|
|
|
token = pg_strtok(&length); /* skip :boolop */
|
|
|
|
token = pg_strtok(&length); /* get field value */
|
|
|
|
if (strncmp(token, "and", 3) == 0)
|
|
|
|
local_node->boolop = AND_EXPR;
|
|
|
|
else if (strncmp(token, "or", 2) == 0)
|
|
|
|
local_node->boolop = OR_EXPR;
|
|
|
|
else if (strncmp(token, "not", 3) == 0)
|
|
|
|
local_node->boolop = NOT_EXPR;
|
|
|
|
else
|
2003-07-23 01:30:39 +02:00
|
|
|
elog(ERROR, "unrecognized boolop \"%.*s\"", length, token);
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readSubLink
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
static SubLink *
|
|
|
|
_readSubLink(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(SubLink);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_ENUM_FIELD(subLinkType, SubLinkType);
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
READ_INT_FIELD(subLinkId);
|
2005-12-28 02:30:02 +01:00
|
|
|
READ_NODE_FIELD(testexpr);
|
2003-01-10 22:08:15 +01:00
|
|
|
READ_NODE_FIELD(operName);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(subselect);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*
|
2002-12-14 01:17:59 +01:00
|
|
|
* _readSubPlan is not needed since it doesn't appear in stored rules.
|
2002-12-12 16:49:42 +01:00
|
|
|
*/
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readFieldSelect
|
|
|
|
*/
|
|
|
|
static FieldSelect *
|
|
|
|
_readFieldSelect(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(FieldSelect);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_INT_FIELD(fieldnum);
|
|
|
|
READ_OID_FIELD(resulttype);
|
|
|
|
READ_INT_FIELD(resulttypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(resultcollid);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2004-06-09 21:08:20 +02:00
|
|
|
/*
|
|
|
|
* _readFieldStore
|
|
|
|
*/
|
|
|
|
static FieldStore *
|
|
|
|
_readFieldStore(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(FieldStore);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_NODE_FIELD(newvals);
|
|
|
|
READ_NODE_FIELD(fieldnums);
|
|
|
|
READ_OID_FIELD(resulttype);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readRelabelType
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
static RelabelType *
|
|
|
|
_readRelabelType(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(RelabelType);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_OID_FIELD(resulttype);
|
|
|
|
READ_INT_FIELD(resulttypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(resultcollid);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_ENUM_FIELD(relabelformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2007-06-05 23:31:09 +02:00
|
|
|
/*
|
|
|
|
* _readCoerceViaIO
|
|
|
|
*/
|
|
|
|
static CoerceViaIO *
|
|
|
|
_readCoerceViaIO(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CoerceViaIO);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_OID_FIELD(resulttype);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(resultcollid);
|
2007-06-05 23:31:09 +02:00
|
|
|
READ_ENUM_FIELD(coerceformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2007-06-05 23:31:09 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2007-03-28 01:21:12 +02:00
|
|
|
/*
|
|
|
|
* _readArrayCoerceExpr
|
|
|
|
*/
|
|
|
|
static ArrayCoerceExpr *
|
|
|
|
_readArrayCoerceExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(ArrayCoerceExpr);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(arg);
|
Support arrays over domains.
Allowing arrays with a domain type as their element type was left un-done
in the original domain patch, but not for any very good reason. This
omission leads to such surprising results as array_agg() not working on
a domain column, because the parser can't identify a suitable output type
for the polymorphic aggregate.
In order to fix this, first clean up the APIs of coerce_to_domain() and
some internal functions in parse_coerce.c so that we consistently pass
around a CoercionContext along with CoercionForm. Previously, we sometimes
passed an "isExplicit" boolean flag instead, which is strictly less
information; and coerce_to_domain() didn't even get that, but instead had
to reverse-engineer isExplicit from CoercionForm. That's contrary to the
documentation in primnodes.h that says that CoercionForm only affects
display and not semantics. I don't think this change fixes any live bugs,
but it makes things more consistent. The main reason for doing it though
is that now build_coercion_expression() receives ccontext, which it needs
in order to be able to recursively invoke coerce_to_target_type().
Next, reimplement ArrayCoerceExpr so that the node does not directly know
any details of what has to be done to the individual array elements while
performing the array coercion. Instead, the per-element processing is
represented by a sub-expression whose input is a source array element and
whose output is a target array element. This simplifies life in
parse_coerce.c, because it can build that sub-expression by a recursive
invocation of coerce_to_target_type(). The executor now handles the
per-element processing as a compiled expression instead of hard-wired code.
The main advantage of this is that we can use a single ArrayCoerceExpr to
handle as many as three successive steps per element: base type conversion,
typmod coercion, and domain constraint checking. The old code used two
stacked ArrayCoerceExprs to handle type + typmod coercion, which was pretty
inefficient, and adding yet another array deconstruction to do domain
constraint checking seemed very unappetizing.
In the case where we just need a single, very simple coercion function,
doing this straightforwardly leads to a noticeable increase in the
per-array-element runtime cost. Hence, add an additional shortcut evalfunc
in execExprInterp.c that skips unnecessary overhead for that specific form
of expression. The runtime speed of simple cases is within 1% or so of
where it was before, while cases that previously required two levels of
array processing are significantly faster.
Finally, create an implicit array type for every domain type, as we do for
base types, enums, etc. Everything except the array-coercion case seems
to just work without further effort.
Tom Lane, reviewed by Andrew Dunstan
Discussion: https://postgr.es/m/9852.1499791473@sss.pgh.pa.us
2017-09-30 19:40:56 +02:00
|
|
|
READ_NODE_FIELD(elemexpr);
|
2007-03-28 01:21:12 +02:00
|
|
|
READ_OID_FIELD(resulttype);
|
|
|
|
READ_INT_FIELD(resulttypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(resultcollid);
|
2007-03-28 01:21:12 +02:00
|
|
|
READ_ENUM_FIELD(coerceformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2007-03-28 01:21:12 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2004-12-12 00:26:51 +01:00
|
|
|
/*
|
|
|
|
* _readConvertRowtypeExpr
|
|
|
|
*/
|
|
|
|
static ConvertRowtypeExpr *
|
|
|
|
_readConvertRowtypeExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(ConvertRowtypeExpr);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_OID_FIELD(resulttype);
|
|
|
|
READ_ENUM_FIELD(convertformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2004-12-12 00:26:51 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2011-03-11 22:27:51 +01:00
|
|
|
/*
|
|
|
|
* _readCollateExpr
|
|
|
|
*/
|
|
|
|
static CollateExpr *
|
|
|
|
_readCollateExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CollateExpr);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_OID_FIELD(collOid);
|
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readCaseExpr
|
|
|
|
*/
|
|
|
|
static CaseExpr *
|
|
|
|
_readCaseExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CaseExpr);
|
1998-11-22 11:48:45 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_OID_FIELD(casetype);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(casecollid);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_NODE_FIELD(args);
|
|
|
|
READ_NODE_FIELD(defresult);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1999-08-09 08:20:27 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readCaseWhen
|
1999-11-23 21:07:06 +01:00
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
static CaseWhen *
|
|
|
|
_readCaseWhen(void)
|
1999-11-23 21:07:06 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(CaseWhen);
|
1999-11-23 21:07:06 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(expr);
|
|
|
|
READ_NODE_FIELD(result);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1999-11-23 21:07:06 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1999-11-23 21:07:06 +01:00
|
|
|
}
|
|
|
|
|
2004-03-17 21:48:43 +01:00
|
|
|
/*
|
|
|
|
* _readCaseTestExpr
|
|
|
|
*/
|
|
|
|
static CaseTestExpr *
|
|
|
|
_readCaseTestExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CaseTestExpr);
|
|
|
|
|
|
|
|
READ_OID_FIELD(typeId);
|
|
|
|
READ_INT_FIELD(typeMod);
|
2011-02-08 22:04:18 +01:00
|
|
|
READ_OID_FIELD(collation);
|
2004-03-17 21:48:43 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2003-04-09 01:20:04 +02:00
|
|
|
/*
|
|
|
|
* _readArrayExpr
|
|
|
|
*/
|
|
|
|
static ArrayExpr *
|
|
|
|
_readArrayExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(ArrayExpr);
|
|
|
|
|
|
|
|
READ_OID_FIELD(array_typeid);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(array_collid);
|
2003-04-09 01:20:04 +02:00
|
|
|
READ_OID_FIELD(element_typeid);
|
|
|
|
READ_NODE_FIELD(elements);
|
2003-08-18 01:43:27 +02:00
|
|
|
READ_BOOL_FIELD(multidims);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2003-04-09 01:20:04 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2004-05-11 00:44:49 +02:00
|
|
|
/*
|
|
|
|
* _readRowExpr
|
|
|
|
*/
|
|
|
|
static RowExpr *
|
|
|
|
_readRowExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(RowExpr);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(args);
|
|
|
|
READ_OID_FIELD(row_typeid);
|
|
|
|
READ_ENUM_FIELD(row_format, CoercionForm);
|
2008-10-06 19:39:26 +02:00
|
|
|
READ_NODE_FIELD(colnames);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2004-05-11 00:44:49 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2005-12-28 02:30:02 +01:00
|
|
|
/*
|
|
|
|
* _readRowCompareExpr
|
|
|
|
*/
|
|
|
|
static RowCompareExpr *
|
|
|
|
_readRowCompareExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(RowCompareExpr);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(rctype, RowCompareType);
|
|
|
|
READ_NODE_FIELD(opnos);
|
2006-12-23 01:43:13 +01:00
|
|
|
READ_NODE_FIELD(opfamilies);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_NODE_FIELD(inputcollids);
|
2005-12-28 02:30:02 +01:00
|
|
|
READ_NODE_FIELD(largs);
|
|
|
|
READ_NODE_FIELD(rargs);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2003-02-16 03:30:39 +01:00
|
|
|
/*
|
|
|
|
* _readCoalesceExpr
|
|
|
|
*/
|
|
|
|
static CoalesceExpr *
|
|
|
|
_readCoalesceExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CoalesceExpr);
|
|
|
|
|
|
|
|
READ_OID_FIELD(coalescetype);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(coalescecollid);
|
2003-02-16 03:30:39 +01:00
|
|
|
READ_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2003-02-16 03:30:39 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2005-06-27 00:05:42 +02:00
|
|
|
/*
|
|
|
|
* _readMinMaxExpr
|
|
|
|
*/
|
|
|
|
static MinMaxExpr *
|
|
|
|
_readMinMaxExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(MinMaxExpr);
|
|
|
|
|
|
|
|
READ_OID_FIELD(minmaxtype);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(minmaxcollid);
|
|
|
|
READ_OID_FIELD(inputcollid);
|
2005-06-27 00:05:42 +02:00
|
|
|
READ_ENUM_FIELD(op, MinMaxOp);
|
|
|
|
READ_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2005-06-27 00:05:42 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2016-08-17 02:33:01 +02:00
|
|
|
/*
|
|
|
|
* _readSQLValueFunction
|
|
|
|
*/
|
|
|
|
static SQLValueFunction *
|
|
|
|
_readSQLValueFunction(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(SQLValueFunction);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(op, SQLValueFunctionOp);
|
|
|
|
READ_OID_FIELD(type);
|
|
|
|
READ_INT_FIELD(typmod);
|
|
|
|
READ_LOCATION_FIELD(location);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2006-12-24 01:29:20 +01:00
|
|
|
/*
|
|
|
|
* _readXmlExpr
|
|
|
|
*/
|
|
|
|
static XmlExpr *
|
|
|
|
_readXmlExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(XmlExpr);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(op, XmlExprOp);
|
|
|
|
READ_STRING_FIELD(name);
|
|
|
|
READ_NODE_FIELD(named_args);
|
|
|
|
READ_NODE_FIELD(arg_names);
|
|
|
|
READ_NODE_FIELD(args);
|
2007-02-03 15:06:56 +01:00
|
|
|
READ_ENUM_FIELD(xmloption, XmlOptionType);
|
|
|
|
READ_OID_FIELD(type);
|
|
|
|
READ_INT_FIELD(typmod);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2006-12-24 01:29:20 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readNullTest
|
2000-09-29 20:21:41 +02:00
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
static NullTest *
|
|
|
|
_readNullTest(void)
|
2000-09-29 20:21:41 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(NullTest);
|
2000-09-29 20:21:41 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_ENUM_FIELD(nulltesttype, NullTestType);
|
2010-01-02 00:03:10 +01:00
|
|
|
READ_BOOL_FIELD(argisrow);
|
2015-02-22 20:40:27 +01:00
|
|
|
READ_LOCATION_FIELD(location);
|
2000-09-29 20:21:41 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
2000-09-29 20:21:41 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readBooleanTest
|
2002-05-12 22:10:05 +02:00
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
static BooleanTest *
|
|
|
|
_readBooleanTest(void)
|
2002-05-12 22:10:05 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(BooleanTest);
|
2002-05-12 22:10:05 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_ENUM_FIELD(booltesttype, BoolTestType);
|
2015-02-22 20:40:27 +01:00
|
|
|
READ_LOCATION_FIELD(location);
|
2002-05-12 22:10:05 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
2002-05-12 22:10:05 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
2003-02-03 22:15:45 +01:00
|
|
|
* _readCoerceToDomain
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2003-02-03 22:15:45 +01:00
|
|
|
static CoerceToDomain *
|
|
|
|
_readCoerceToDomain(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2003-02-03 22:15:45 +01:00
|
|
|
READ_LOCALS(CoerceToDomain);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(arg);
|
2003-02-03 22:15:45 +01:00
|
|
|
READ_OID_FIELD(resulttype);
|
|
|
|
READ_INT_FIELD(resulttypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(resultcollid);
|
2003-02-03 22:15:45 +01:00
|
|
|
READ_ENUM_FIELD(coercionformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
2003-02-03 22:15:45 +01:00
|
|
|
* _readCoerceToDomainValue
|
2002-11-25 19:12:12 +01:00
|
|
|
*/
|
2003-02-03 22:15:45 +01:00
|
|
|
static CoerceToDomainValue *
|
|
|
|
_readCoerceToDomainValue(void)
|
2002-11-25 19:12:12 +01:00
|
|
|
{
|
2003-02-03 22:15:45 +01:00
|
|
|
READ_LOCALS(CoerceToDomainValue);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_OID_FIELD(typeId);
|
|
|
|
READ_INT_FIELD(typeMod);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(collation);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2003-07-03 18:34:26 +02:00
|
|
|
/*
|
|
|
|
* _readSetToDefault
|
|
|
|
*/
|
|
|
|
static SetToDefault *
|
|
|
|
_readSetToDefault(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(SetToDefault);
|
|
|
|
|
|
|
|
READ_OID_FIELD(typeId);
|
|
|
|
READ_INT_FIELD(typeMod);
|
2011-03-20 01:29:08 +01:00
|
|
|
READ_OID_FIELD(collation);
|
2008-08-29 01:09:48 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2003-07-03 18:34:26 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2007-06-11 03:16:30 +02:00
|
|
|
/*
|
|
|
|
* _readCurrentOfExpr
|
|
|
|
*/
|
|
|
|
static CurrentOfExpr *
|
|
|
|
_readCurrentOfExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CurrentOfExpr);
|
|
|
|
|
|
|
|
READ_UINT_FIELD(cvarno);
|
|
|
|
READ_STRING_FIELD(cursor_name);
|
2007-06-12 00:22:42 +02:00
|
|
|
READ_INT_FIELD(cursor_param);
|
2007-06-11 03:16:30 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
Code review for NextValueExpr expression node type.
Add missing infrastructure for this node type, notably in ruleutils.c where
its lack could demonstrably cause EXPLAIN to fail. Add outfuncs/readfuncs
support. (outfuncs support is useful today for debugging purposes. The
readfuncs support may never be needed, since at present it would only
matter for parallel query and NextValueExpr should never appear in a
parallelizable query; but it seems like a bad idea to have a primnode type
that isn't fully supported here.) Teach planner infrastructure that
NextValueExpr is a volatile, parallel-unsafe, non-leaky expression node
with cost cpu_operator_cost. Given its limited scope of usage, there
*might* be no live bug today from the lack of that knowledge, but it's
certainly going to bite us on the rear someday. Teach pg_stat_statements
about the new node type, too.
While at it, also teach cost_qual_eval() that MinMaxExpr, SQLValueFunction,
XmlExpr, and CoerceToDomain should be charged as cpu_operator_cost.
Failing to do this for SQLValueFunction was an oversight in my commit
0bb51aa96. The others are longer-standing oversights, but no time like the
present to fix them. (In principle, CoerceToDomain could have cost much
higher than this, but it doesn't presently seem worth trying to examine the
domain's constraints here.)
Modify execExprInterp.c to execute NextValueExpr as an out-of-line
function; it seems quite unlikely to me that it's worth insisting that
it be inlined in all expression eval methods. Besides, providing the
out-of-line function doesn't stop anyone from inlining if they want to.
Adjust some places where NextValueExpr support had been inserted with the
aid of a dartboard rather than keeping it in the same order as elsewhere.
Discussion: https://postgr.es/m/23862.1499981661@sss.pgh.pa.us
2017-07-14 21:25:43 +02:00
|
|
|
/*
|
|
|
|
* _readNextValueExpr
|
|
|
|
*/
|
|
|
|
static NextValueExpr *
|
|
|
|
_readNextValueExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(NextValueExpr);
|
|
|
|
|
|
|
|
READ_OID_FIELD(seqid);
|
|
|
|
READ_OID_FIELD(typeId);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
/*
|
|
|
|
* _readInferenceElem
|
|
|
|
*/
|
|
|
|
static InferenceElem *
|
|
|
|
_readInferenceElem(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(InferenceElem);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(expr);
|
|
|
|
READ_OID_FIELD(infercollid);
|
2015-05-19 21:17:52 +02:00
|
|
|
READ_OID_FIELD(inferopclass);
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2002-11-25 19:12:12 +01:00
|
|
|
* _readTargetEntry
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
static TargetEntry *
|
|
|
|
_readTargetEntry(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(TargetEntry);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(expr);
|
2005-04-06 18:34:07 +02:00
|
|
|
READ_INT_FIELD(resno);
|
|
|
|
READ_STRING_FIELD(resname);
|
|
|
|
READ_UINT_FIELD(ressortgroupref);
|
|
|
|
READ_OID_FIELD(resorigtbl);
|
|
|
|
READ_INT_FIELD(resorigcol);
|
|
|
|
READ_BOOL_FIELD(resjunk);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*
|
|
|
|
* _readRangeTblRef
|
|
|
|
*/
|
|
|
|
static RangeTblRef *
|
|
|
|
_readRangeTblRef(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(RangeTblRef);
|
|
|
|
|
|
|
|
READ_INT_FIELD(rtindex);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readJoinExpr
|
|
|
|
*/
|
|
|
|
static JoinExpr *
|
|
|
|
_readJoinExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(JoinExpr);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(jointype, JoinType);
|
|
|
|
READ_BOOL_FIELD(isNatural);
|
|
|
|
READ_NODE_FIELD(larg);
|
|
|
|
READ_NODE_FIELD(rarg);
|
2009-07-16 08:33:46 +02:00
|
|
|
READ_NODE_FIELD(usingClause);
|
2002-12-12 16:49:42 +01:00
|
|
|
READ_NODE_FIELD(quals);
|
|
|
|
READ_NODE_FIELD(alias);
|
|
|
|
READ_INT_FIELD(rtindex);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readFromExpr
|
|
|
|
*/
|
|
|
|
static FromExpr *
|
|
|
|
_readFromExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(FromExpr);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(fromlist);
|
|
|
|
READ_NODE_FIELD(quals);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
/*
|
|
|
|
* _readOnConflictExpr
|
|
|
|
*/
|
|
|
|
static OnConflictExpr *
|
|
|
|
_readOnConflictExpr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(OnConflictExpr);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(action, OnConflictAction);
|
|
|
|
READ_NODE_FIELD(arbiterElems);
|
|
|
|
READ_NODE_FIELD(arbiterWhere);
|
2015-08-06 02:44:27 +02:00
|
|
|
READ_OID_FIELD(constraint);
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
READ_NODE_FIELD(onConflictSet);
|
|
|
|
READ_NODE_FIELD(onConflictWhere);
|
|
|
|
READ_INT_FIELD(exclRelIndex);
|
|
|
|
READ_NODE_FIELD(exclRelTlist);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
2002-12-12 16:49:42 +01:00
|
|
|
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
static PartitionPruneStepOp *
|
|
|
|
_readPartitionPruneStepOp(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(PartitionPruneStepOp);
|
|
|
|
|
|
|
|
READ_INT_FIELD(step.step_id);
|
|
|
|
READ_INT_FIELD(opstrategy);
|
|
|
|
READ_NODE_FIELD(exprs);
|
|
|
|
READ_NODE_FIELD(cmpfns);
|
|
|
|
READ_BITMAPSET_FIELD(nullkeys);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
static PartitionPruneStepCombine *
|
|
|
|
_readPartitionPruneStepCombine(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(PartitionPruneStepCombine);
|
|
|
|
|
|
|
|
READ_INT_FIELD(step.step_id);
|
|
|
|
READ_ENUM_FIELD(combineOp, PartitionPruneCombineOp);
|
|
|
|
READ_NODE_FIELD(source_stepids);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
static PartitionPruneInfo *
|
|
|
|
_readPartitionPruneInfo(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(PartitionPruneInfo);
|
|
|
|
|
|
|
|
READ_OID_FIELD(reloid);
|
|
|
|
READ_NODE_FIELD(pruning_steps);
|
|
|
|
READ_BITMAPSET_FIELD(present_parts);
|
|
|
|
READ_INT_FIELD(nparts);
|
2018-06-10 21:22:25 +02:00
|
|
|
READ_INT_FIELD(nexprs);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
READ_INT_ARRAY(subnode_map, local_node->nparts);
|
|
|
|
READ_INT_ARRAY(subpart_map, local_node->nparts);
|
2018-06-10 21:22:25 +02:00
|
|
|
READ_BOOL_ARRAY(hasexecparam, local_node->nexprs);
|
|
|
|
READ_BOOL_FIELD(do_initial_prune);
|
|
|
|
READ_BOOL_FIELD(do_exec_prune);
|
|
|
|
READ_BITMAPSET_FIELD(execparamids);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*
|
|
|
|
* Stuff from parsenodes.h.
|
|
|
|
*/
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* _readRangeTblEntry
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
static RangeTblEntry *
|
2000-10-23 00:15:13 +02:00
|
|
|
_readRangeTblEntry(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_LOCALS(RangeTblEntry);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/* put alias + eref first to make dump more legible */
|
|
|
|
READ_NODE_FIELD(alias);
|
|
|
|
READ_NODE_FIELD(eref);
|
|
|
|
READ_ENUM_FIELD(rtekind, RTEKind);
|
2002-03-22 03:56:37 +01:00
|
|
|
|
|
|
|
switch (local_node->rtekind)
|
|
|
|
{
|
|
|
|
case RTE_RELATION:
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_OID_FIELD(relid);
|
2011-02-23 01:23:23 +01:00
|
|
|
READ_CHAR_FIELD(relkind);
|
2015-05-15 20:37:10 +02:00
|
|
|
READ_NODE_FIELD(tablesample);
|
2002-03-22 03:56:37 +01:00
|
|
|
break;
|
|
|
|
case RTE_SUBQUERY:
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_NODE_FIELD(subquery);
|
2011-12-22 22:15:57 +01:00
|
|
|
READ_BOOL_FIELD(security_barrier);
|
2002-03-22 03:56:37 +01:00
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case RTE_JOIN:
|
|
|
|
READ_ENUM_FIELD(jointype, JoinType);
|
|
|
|
READ_NODE_FIELD(joinaliasvars);
|
|
|
|
break;
|
2002-05-12 22:10:05 +02:00
|
|
|
case RTE_FUNCTION:
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
READ_NODE_FIELD(functions);
|
2013-07-29 17:38:01 +02:00
|
|
|
READ_BOOL_FIELD(funcordinality);
|
2002-05-12 22:10:05 +02:00
|
|
|
break;
|
2017-03-08 16:39:37 +01:00
|
|
|
case RTE_TABLEFUNC:
|
|
|
|
READ_NODE_FIELD(tablefunc);
|
|
|
|
break;
|
2006-08-02 03:59:48 +02:00
|
|
|
case RTE_VALUES:
|
|
|
|
READ_NODE_FIELD(values_lists);
|
Fix reporting of column typmods for multi-row VALUES constructs.
expandRTE() and get_rte_attribute_type() reported the exprType() and
exprTypmod() values of the expressions in the first row of the VALUES as
being the column type/typmod returned by the VALUES RTE. That's fine for
the data type, since we coerce all expressions in a column to have the same
common type. But we don't coerce them to have a common typmod, so it was
possible for rows after the first one to return values that violate the
claimed column typmod. This leads to the incorrect result seen in bug
#14448 from Hassan Mahmood, as well as some other corner-case misbehaviors.
The desired behavior is the same as we use in other type-unification
cases: report the common typmod if there is one, but otherwise return -1
indicating no particular constraint. It's cheap for transformValuesClause
to determine the common typmod while transforming a multi-row VALUES, but
it'd be less cheap for expandRTE() and get_rte_attribute_type() to
re-determine that info every time they're asked --- possibly a lot less
cheap, if the VALUES has many rows. Therefore, the best fix is to record
the common typmods explicitly in a list in the VALUES RTE, as we were
already doing for column collations. This looks quite a bit like what
we're doing for CTE RTEs, so we can save a little bit of space and code by
unifying the representation for those two RTE types. They both now share
coltypes/coltypmods/colcollations fields. (At some point it might seem
desirable to populate those fields for all RTE types; but right now it
looks like constructing them for other RTE types would add more code and
cycles than it would save.)
The RTE change requires a catversion bump, so this fix is only usable
in HEAD. If we fix this at all in the back branches, the patch will
need to look quite different.
Report: https://postgr.es/m/20161205143037.4377.60754@wrigleys.postgresql.org
Discussion: https://postgr.es/m/27429.1480968538@sss.pgh.pa.us
2016-12-08 17:40:02 +01:00
|
|
|
READ_NODE_FIELD(coltypes);
|
|
|
|
READ_NODE_FIELD(coltypmods);
|
|
|
|
READ_NODE_FIELD(colcollations);
|
2006-08-02 03:59:48 +02:00
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case RTE_CTE:
|
|
|
|
READ_STRING_FIELD(ctename);
|
|
|
|
READ_UINT_FIELD(ctelevelsup);
|
|
|
|
READ_BOOL_FIELD(self_reference);
|
Fix reporting of column typmods for multi-row VALUES constructs.
expandRTE() and get_rte_attribute_type() reported the exprType() and
exprTypmod() values of the expressions in the first row of the VALUES as
being the column type/typmod returned by the VALUES RTE. That's fine for
the data type, since we coerce all expressions in a column to have the same
common type. But we don't coerce them to have a common typmod, so it was
possible for rows after the first one to return values that violate the
claimed column typmod. This leads to the incorrect result seen in bug
#14448 from Hassan Mahmood, as well as some other corner-case misbehaviors.
The desired behavior is the same as we use in other type-unification
cases: report the common typmod if there is one, but otherwise return -1
indicating no particular constraint. It's cheap for transformValuesClause
to determine the common typmod while transforming a multi-row VALUES, but
it'd be less cheap for expandRTE() and get_rte_attribute_type() to
re-determine that info every time they're asked --- possibly a lot less
cheap, if the VALUES has many rows. Therefore, the best fix is to record
the common typmods explicitly in a list in the VALUES RTE, as we were
already doing for column collations. This looks quite a bit like what
we're doing for CTE RTEs, so we can save a little bit of space and code by
unifying the representation for those two RTE types. They both now share
coltypes/coltypmods/colcollations fields. (At some point it might seem
desirable to populate those fields for all RTE types; but right now it
looks like constructing them for other RTE types would add more code and
cycles than it would save.)
The RTE change requires a catversion bump, so this fix is only usable
in HEAD. If we fix this at all in the back branches, the patch will
need to look quite different.
Report: https://postgr.es/m/20161205143037.4377.60754@wrigleys.postgresql.org
Discussion: https://postgr.es/m/27429.1480968538@sss.pgh.pa.us
2016-12-08 17:40:02 +01:00
|
|
|
READ_NODE_FIELD(coltypes);
|
|
|
|
READ_NODE_FIELD(coltypmods);
|
|
|
|
READ_NODE_FIELD(colcollations);
|
2002-03-22 03:56:37 +01:00
|
|
|
break;
|
2017-04-01 06:17:18 +02:00
|
|
|
case RTE_NAMEDTUPLESTORE:
|
|
|
|
READ_STRING_FIELD(enrname);
|
2017-06-14 22:19:46 +02:00
|
|
|
READ_FLOAT_FIELD(enrtuples);
|
2017-04-01 06:17:18 +02:00
|
|
|
READ_OID_FIELD(relid);
|
|
|
|
READ_NODE_FIELD(coltypes);
|
|
|
|
READ_NODE_FIELD(coltypmods);
|
|
|
|
READ_NODE_FIELD(colcollations);
|
|
|
|
break;
|
2002-03-22 03:56:37 +01:00
|
|
|
default:
|
2003-07-23 01:30:39 +02:00
|
|
|
elog(ERROR, "unrecognized RTE kind: %d",
|
|
|
|
(int) local_node->rtekind);
|
2002-03-22 03:56:37 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2012-08-08 01:02:54 +02:00
|
|
|
READ_BOOL_FIELD(lateral);
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_BOOL_FIELD(inh);
|
|
|
|
READ_BOOL_FIELD(inFromCl);
|
2004-01-15 00:01:55 +01:00
|
|
|
READ_UINT_FIELD(requiredPerms);
|
2005-06-28 07:09:14 +02:00
|
|
|
READ_OID_FIELD(checkAsUser);
|
2009-01-22 21:16:10 +01:00
|
|
|
READ_BITMAPSET_FIELD(selectedCols);
|
2015-05-08 00:20:46 +02:00
|
|
|
READ_BITMAPSET_FIELD(insertedCols);
|
|
|
|
READ_BITMAPSET_FIELD(updatedCols);
|
2014-04-13 03:04:58 +02:00
|
|
|
READ_NODE_FIELD(securityQuals);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_DONE();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
/*
|
|
|
|
* _readRangeTblFunction
|
|
|
|
*/
|
|
|
|
static RangeTblFunction *
|
|
|
|
_readRangeTblFunction(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(RangeTblFunction);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(funcexpr);
|
|
|
|
READ_INT_FIELD(funccolcount);
|
|
|
|
READ_NODE_FIELD(funccolnames);
|
|
|
|
READ_NODE_FIELD(funccoltypes);
|
|
|
|
READ_NODE_FIELD(funccoltypmods);
|
|
|
|
READ_NODE_FIELD(funccolcollations);
|
|
|
|
READ_BITMAPSET_FIELD(funcparams);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
/*
|
|
|
|
* _readTableSampleClause
|
|
|
|
*/
|
|
|
|
static TableSampleClause *
|
|
|
|
_readTableSampleClause(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(TableSampleClause);
|
|
|
|
|
|
|
|
READ_OID_FIELD(tsmhandler);
|
|
|
|
READ_NODE_FIELD(args);
|
|
|
|
READ_NODE_FIELD(repeatable);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2015-09-23 17:51:50 +02:00
|
|
|
/*
|
|
|
|
* _readDefElem
|
|
|
|
*/
|
|
|
|
static DefElem *
|
|
|
|
_readDefElem(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(DefElem);
|
|
|
|
|
|
|
|
READ_STRING_FIELD(defnamespace);
|
|
|
|
READ_STRING_FIELD(defname);
|
|
|
|
READ_NODE_FIELD(arg);
|
|
|
|
READ_ENUM_FIELD(defaction, DefElemAction);
|
2016-09-06 18:00:00 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
2015-09-23 17:51:50 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readPlannedStmt
|
|
|
|
*/
|
|
|
|
static PlannedStmt *
|
|
|
|
_readPlannedStmt(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(PlannedStmt);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(commandType, CmdType);
|
2017-10-12 01:52:46 +02:00
|
|
|
READ_UINT64_FIELD(queryId);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_BOOL_FIELD(hasReturning);
|
|
|
|
READ_BOOL_FIELD(hasModifyingCTE);
|
|
|
|
READ_BOOL_FIELD(canSetTag);
|
|
|
|
READ_BOOL_FIELD(transientPlan);
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
READ_BOOL_FIELD(dependsOnRole);
|
|
|
|
READ_BOOL_FIELD(parallelModeNeeded);
|
2018-04-28 22:46:24 +02:00
|
|
|
READ_INT_FIELD(jitFlags);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(planTree);
|
|
|
|
READ_NODE_FIELD(rtable);
|
|
|
|
READ_NODE_FIELD(resultRelations);
|
2017-03-21 14:48:04 +01:00
|
|
|
READ_NODE_FIELD(nonleafResultRelations);
|
2017-05-01 14:23:01 +02:00
|
|
|
READ_NODE_FIELD(rootResultRelations);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(subplans);
|
|
|
|
READ_BITMAPSET_FIELD(rewindPlanIDs);
|
|
|
|
READ_NODE_FIELD(rowMarks);
|
|
|
|
READ_NODE_FIELD(relationOids);
|
|
|
|
READ_NODE_FIELD(invalItems);
|
2017-11-13 21:24:12 +01:00
|
|
|
READ_NODE_FIELD(paramExecTypes);
|
Change representation of statement lists, and add statement location info.
This patch makes several changes that improve the consistency of
representation of lists of statements. It's always been the case
that the output of parse analysis is a list of Query nodes, whatever
the types of the individual statements in the list. This patch brings
similar consistency to the outputs of raw parsing and planning steps:
* The output of raw parsing is now always a list of RawStmt nodes;
the statement-type-dependent nodes are one level down from that.
* The output of pg_plan_queries() is now always a list of PlannedStmt
nodes, even for utility statements. In the case of a utility statement,
"planning" just consists of wrapping a CMD_UTILITY PlannedStmt around
the utility node. This list representation is now used in Portal and
CachedPlan plan lists, replacing the former convention of intermixing
PlannedStmts with bare utility-statement nodes.
Now, every list of statements has a consistent head-node type depending
on how far along it is in processing. This allows changing many places
that formerly used generic "Node *" pointers to use a more specific
pointer type, thus reducing the number of IsA() tests and casts needed,
as well as improving code clarity.
Also, the post-parse-analysis representation of DECLARE CURSOR is changed
so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained
SELECT remains a child of the DeclareCursorStmt rather than getting flipped
around to be the other way. It's now true for both Query and PlannedStmt
that utilityStmt is non-null if and only if commandType is CMD_UTILITY.
That allows simplifying a lot of places that were testing both fields.
(I think some of those were just defensive programming, but in many places,
it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.)
Because PlannedStmt carries a canSetTag field, we're also able to get rid
of some ad-hoc rules about how to reconstruct canSetTag for a bare utility
statement; specifically, the assumption that a utility is canSetTag if and
only if it's the only one in its list. While I see no near-term need for
relaxing that restriction, it's nice to get rid of the ad-hocery.
The API of ProcessUtility() is changed so that what it's passed is the
wrapper PlannedStmt not just the bare utility statement. This will affect
all users of ProcessUtility_hook, but the changes are pretty trivial; see
the affected contrib modules for examples of the minimum change needed.
(Most compilers should give pointer-type-mismatch warnings for uncorrected
code.)
There's also a change in the API of ExplainOneQuery_hook, to pass through
cursorOptions instead of expecting hook functions to know what to pick.
This is needed because of the DECLARE CURSOR changes, but really should
have been done in 9.6; it's unlikely that any extant hook functions
know about using CURSOR_OPT_PARALLEL_OK.
Finally, teach gram.y to save statement boundary locations in RawStmt
nodes, and pass those through to Query and PlannedStmt nodes. This allows
more intelligent handling of cases where a source query string contains
multiple statements. This patch doesn't actually do anything with the
information, but a follow-on patch will. (Passing this information through
cleanly is the true motivation for these changes; while I think this is all
good cleanup, it's unlikely we'd have bothered without this end goal.)
catversion bump because addition of location fields to struct Query
affects stored rules.
This patch is by me, but it owes a good deal to Fabien Coelho who did
a lot of preliminary work on the problem, and also reviewed the patch.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
|
|
|
READ_NODE_FIELD(utilityStmt);
|
|
|
|
READ_LOCATION_FIELD(stmt_location);
|
|
|
|
READ_LOCATION_FIELD(stmt_len);
|
2015-09-23 17:51:50 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ReadCommonPlan
|
|
|
|
* Assign the basic stuff of all nodes that inherit from Plan
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ReadCommonPlan(Plan *local_node)
|
|
|
|
{
|
|
|
|
READ_TEMP_LOCALS();
|
|
|
|
|
|
|
|
READ_FLOAT_FIELD(startup_cost);
|
|
|
|
READ_FLOAT_FIELD(total_cost);
|
|
|
|
READ_FLOAT_FIELD(plan_rows);
|
|
|
|
READ_INT_FIELD(plan_width);
|
2015-11-11 14:57:52 +01:00
|
|
|
READ_BOOL_FIELD(parallel_aware);
|
2017-04-12 21:13:23 +02:00
|
|
|
READ_BOOL_FIELD(parallel_safe);
|
2015-09-29 03:55:57 +02:00
|
|
|
READ_INT_FIELD(plan_node_id);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(targetlist);
|
|
|
|
READ_NODE_FIELD(qual);
|
|
|
|
READ_NODE_FIELD(lefttree);
|
|
|
|
READ_NODE_FIELD(righttree);
|
|
|
|
READ_NODE_FIELD(initPlan);
|
|
|
|
READ_BITMAPSET_FIELD(extParam);
|
|
|
|
READ_BITMAPSET_FIELD(allParam);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readPlan
|
|
|
|
*/
|
|
|
|
static Plan *
|
|
|
|
_readPlan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS_NO_FIELDS(Plan);
|
|
|
|
|
|
|
|
ReadCommonPlan(local_node);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readResult
|
|
|
|
*/
|
|
|
|
static Result *
|
|
|
|
_readResult(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Result);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(resconstantqual);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
|
|
|
/*
|
|
|
|
* _readProjectSet
|
|
|
|
*/
|
|
|
|
static ProjectSet *
|
|
|
|
_readProjectSet(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS_NO_FIELDS(ProjectSet);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2015-09-23 17:51:50 +02:00
|
|
|
/*
|
|
|
|
* _readModifyTable
|
|
|
|
*/
|
|
|
|
static ModifyTable *
|
|
|
|
_readModifyTable(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(ModifyTable);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(operation, CmdType);
|
|
|
|
READ_BOOL_FIELD(canSetTag);
|
|
|
|
READ_UINT_FIELD(nominalRelation);
|
2017-03-21 14:48:04 +01:00
|
|
|
READ_NODE_FIELD(partitioned_rels);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
READ_BOOL_FIELD(partColsUpdated);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(resultRelations);
|
|
|
|
READ_INT_FIELD(resultRelIndex);
|
2017-05-01 14:23:01 +02:00
|
|
|
READ_INT_FIELD(rootResultRelIndex);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(plans);
|
|
|
|
READ_NODE_FIELD(withCheckOptionLists);
|
|
|
|
READ_NODE_FIELD(returningLists);
|
|
|
|
READ_NODE_FIELD(fdwPrivLists);
|
2016-03-18 18:48:58 +01:00
|
|
|
READ_BITMAPSET_FIELD(fdwDirectModifyPlans);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(rowMarks);
|
|
|
|
READ_INT_FIELD(epqParam);
|
|
|
|
READ_ENUM_FIELD(onConflictAction, OnConflictAction);
|
|
|
|
READ_NODE_FIELD(arbiterIndexes);
|
|
|
|
READ_NODE_FIELD(onConflictSet);
|
|
|
|
READ_NODE_FIELD(onConflictWhere);
|
|
|
|
READ_UINT_FIELD(exclRelRTI);
|
|
|
|
READ_NODE_FIELD(exclRelTlist);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readAppend
|
|
|
|
*/
|
|
|
|
static Append *
|
|
|
|
_readAppend(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Append);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
2017-03-21 14:48:04 +01:00
|
|
|
READ_NODE_FIELD(partitioned_rels);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(appendplans);
|
Support Parallel Append plan nodes.
When we create an Append node, we can spread out the workers over the
subplans instead of piling on to each subplan one at a time, which
should typically be a bit more efficient, both because the startup
cost of any plan executed entirely by one worker is paid only once and
also because of reduced contention. We can also construct Append
plans using a mix of partial and non-partial subplans, which may allow
for parallelism in places that otherwise couldn't support it.
Unfortunately, this patch doesn't handle the important case of
parallelizing UNION ALL by running each branch in a separate worker;
the executor infrastructure is added here, but more planner work is
needed.
Amit Khandekar, Robert Haas, Amul Sul, reviewed and tested by
Ashutosh Bapat, Amit Langote, Rafia Sabih, Amit Kapila, and
Rajkumar Raghuwanshi.
Discussion: http://postgr.es/m/CAJ3gD9dy0K_E8r727heqXoBmWZ83HwLFwdcaSSmBQ1+S+vRuUQ@mail.gmail.com
2017-12-05 23:28:39 +01:00
|
|
|
READ_INT_FIELD(first_partial_plan);
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
READ_NODE_FIELD(part_prune_infos);
|
2015-09-23 17:51:50 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readMergeAppend
|
|
|
|
*/
|
|
|
|
static MergeAppend *
|
|
|
|
_readMergeAppend(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(MergeAppend);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
2017-03-21 14:48:04 +01:00
|
|
|
READ_NODE_FIELD(partitioned_rels);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(mergeplans);
|
|
|
|
READ_INT_FIELD(numCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(sortColIdx, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(sortOperators, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(collations, local_node->numCols);
|
|
|
|
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readRecursiveUnion
|
|
|
|
*/
|
|
|
|
static RecursiveUnion *
|
|
|
|
_readRecursiveUnion(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(RecursiveUnion);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_INT_FIELD(wtParam);
|
|
|
|
READ_INT_FIELD(numCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(dupOperators, local_node->numCols);
|
|
|
|
READ_LONG_FIELD(numGroups);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readBitmapAnd
|
|
|
|
*/
|
|
|
|
static BitmapAnd *
|
|
|
|
_readBitmapAnd(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(BitmapAnd);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(bitmapplans);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readBitmapOr
|
|
|
|
*/
|
|
|
|
static BitmapOr *
|
|
|
|
_readBitmapOr(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(BitmapOr);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
READ_BOOL_FIELD(isshared);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(bitmapplans);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ReadCommonScan
|
|
|
|
* Assign the basic stuff of all nodes that inherit from Scan
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ReadCommonScan(Scan *local_node)
|
|
|
|
{
|
|
|
|
READ_TEMP_LOCALS();
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_UINT_FIELD(scanrelid);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readScan
|
|
|
|
*/
|
|
|
|
static Scan *
|
|
|
|
_readScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS_NO_FIELDS(Scan);
|
|
|
|
|
|
|
|
ReadCommonScan(local_node);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readSeqScan
|
|
|
|
*/
|
|
|
|
static SeqScan *
|
|
|
|
_readSeqScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS_NO_FIELDS(SeqScan);
|
|
|
|
|
|
|
|
ReadCommonScan(local_node);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readSampleScan
|
|
|
|
*/
|
|
|
|
static SampleScan *
|
|
|
|
_readSampleScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(SampleScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(tablesample);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readIndexScan
|
|
|
|
*/
|
|
|
|
static IndexScan *
|
|
|
|
_readIndexScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(IndexScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_OID_FIELD(indexid);
|
|
|
|
READ_NODE_FIELD(indexqual);
|
|
|
|
READ_NODE_FIELD(indexqualorig);
|
|
|
|
READ_NODE_FIELD(indexorderby);
|
|
|
|
READ_NODE_FIELD(indexorderbyorig);
|
|
|
|
READ_NODE_FIELD(indexorderbyops);
|
|
|
|
READ_ENUM_FIELD(indexorderdir, ScanDirection);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readIndexOnlyScan
|
|
|
|
*/
|
|
|
|
static IndexOnlyScan *
|
|
|
|
_readIndexOnlyScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(IndexOnlyScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_OID_FIELD(indexid);
|
|
|
|
READ_NODE_FIELD(indexqual);
|
|
|
|
READ_NODE_FIELD(indexorderby);
|
|
|
|
READ_NODE_FIELD(indextlist);
|
|
|
|
READ_ENUM_FIELD(indexorderdir, ScanDirection);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readBitmapIndexScan
|
|
|
|
*/
|
|
|
|
static BitmapIndexScan *
|
|
|
|
_readBitmapIndexScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(BitmapIndexScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_OID_FIELD(indexid);
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
READ_BOOL_FIELD(isshared);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(indexqual);
|
|
|
|
READ_NODE_FIELD(indexqualorig);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readBitmapHeapScan
|
|
|
|
*/
|
|
|
|
static BitmapHeapScan *
|
|
|
|
_readBitmapHeapScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(BitmapHeapScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(bitmapqualorig);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readTidScan
|
|
|
|
*/
|
|
|
|
static TidScan *
|
|
|
|
_readTidScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(TidScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(tidquals);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readSubqueryScan
|
|
|
|
*/
|
|
|
|
static SubqueryScan *
|
|
|
|
_readSubqueryScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(SubqueryScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(subplan);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readFunctionScan
|
|
|
|
*/
|
|
|
|
static FunctionScan *
|
|
|
|
_readFunctionScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(FunctionScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(functions);
|
|
|
|
READ_BOOL_FIELD(funcordinality);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readValuesScan
|
|
|
|
*/
|
|
|
|
static ValuesScan *
|
|
|
|
_readValuesScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(ValuesScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(values_lists);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2017-03-08 16:39:37 +01:00
|
|
|
/*
|
|
|
|
* _readTableFuncScan
|
|
|
|
*/
|
|
|
|
static TableFuncScan *
|
|
|
|
_readTableFuncScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(TableFuncScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(tablefunc);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2015-09-23 17:51:50 +02:00
|
|
|
/*
|
|
|
|
* _readCteScan
|
|
|
|
*/
|
|
|
|
static CteScan *
|
|
|
|
_readCteScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CteScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_INT_FIELD(ctePlanId);
|
|
|
|
READ_INT_FIELD(cteParam);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readWorkTableScan
|
|
|
|
*/
|
|
|
|
static WorkTableScan *
|
|
|
|
_readWorkTableScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(WorkTableScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_INT_FIELD(wtParam);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readForeignScan
|
|
|
|
*/
|
|
|
|
static ForeignScan *
|
|
|
|
_readForeignScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(ForeignScan);
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
2016-03-19 06:20:34 +01:00
|
|
|
READ_ENUM_FIELD(operation, CmdType);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_OID_FIELD(fs_server);
|
|
|
|
READ_NODE_FIELD(fdw_exprs);
|
|
|
|
READ_NODE_FIELD(fdw_private);
|
|
|
|
READ_NODE_FIELD(fdw_scan_tlist);
|
2015-10-15 19:00:40 +02:00
|
|
|
READ_NODE_FIELD(fdw_recheck_quals);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_BITMAPSET_FIELD(fs_relids);
|
|
|
|
READ_BOOL_FIELD(fsSystemCol);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2015-11-12 13:40:31 +01:00
|
|
|
/*
|
|
|
|
* _readCustomScan
|
|
|
|
*/
|
|
|
|
static CustomScan *
|
|
|
|
_readCustomScan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(CustomScan);
|
2016-03-29 17:00:18 +02:00
|
|
|
char *custom_name;
|
2015-11-12 13:40:31 +01:00
|
|
|
const CustomScanMethods *methods;
|
|
|
|
|
|
|
|
ReadCommonScan(&local_node->scan);
|
|
|
|
|
|
|
|
READ_UINT_FIELD(flags);
|
|
|
|
READ_NODE_FIELD(custom_plans);
|
|
|
|
READ_NODE_FIELD(custom_exprs);
|
|
|
|
READ_NODE_FIELD(custom_private);
|
|
|
|
READ_NODE_FIELD(custom_scan_tlist);
|
|
|
|
READ_BITMAPSET_FIELD(custom_relids);
|
|
|
|
|
2016-03-29 17:00:18 +02:00
|
|
|
/* Lookup CustomScanMethods by CustomName */
|
2016-06-10 00:02:36 +02:00
|
|
|
token = pg_strtok(&length); /* skip methods: */
|
|
|
|
token = pg_strtok(&length); /* CustomName */
|
2016-03-29 17:00:18 +02:00
|
|
|
custom_name = nullable_string(token, length);
|
|
|
|
methods = GetCustomScanMethods(custom_name, false);
|
2015-11-12 13:40:31 +01:00
|
|
|
local_node->methods = methods;
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2015-09-23 17:51:50 +02:00
|
|
|
/*
|
|
|
|
* ReadCommonJoin
|
|
|
|
* Assign the basic stuff of all nodes that inherit from Join
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ReadCommonJoin(Join *local_node)
|
|
|
|
{
|
|
|
|
READ_TEMP_LOCALS();
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(jointype, JoinType);
|
2017-04-08 04:20:03 +02:00
|
|
|
READ_BOOL_FIELD(inner_unique);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(joinqual);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readJoin
|
|
|
|
*/
|
|
|
|
static Join *
|
|
|
|
_readJoin(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS_NO_FIELDS(Join);
|
|
|
|
|
|
|
|
ReadCommonJoin(local_node);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readNestLoop
|
|
|
|
*/
|
|
|
|
static NestLoop *
|
|
|
|
_readNestLoop(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(NestLoop);
|
|
|
|
|
|
|
|
ReadCommonJoin(&local_node->join);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(nestParams);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readMergeJoin
|
|
|
|
*/
|
|
|
|
static MergeJoin *
|
|
|
|
_readMergeJoin(void)
|
|
|
|
{
|
|
|
|
int numCols;
|
|
|
|
|
|
|
|
READ_LOCALS(MergeJoin);
|
|
|
|
|
|
|
|
ReadCommonJoin(&local_node->join);
|
|
|
|
|
2017-04-08 04:20:03 +02:00
|
|
|
READ_BOOL_FIELD(skip_mark_restore);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(mergeclauses);
|
|
|
|
|
|
|
|
numCols = list_length(local_node->mergeclauses);
|
|
|
|
|
|
|
|
READ_OID_ARRAY(mergeFamilies, numCols);
|
|
|
|
READ_OID_ARRAY(mergeCollations, numCols);
|
|
|
|
READ_INT_ARRAY(mergeStrategies, numCols);
|
|
|
|
READ_BOOL_ARRAY(mergeNullsFirst, numCols);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readHashJoin
|
|
|
|
*/
|
|
|
|
static HashJoin *
|
|
|
|
_readHashJoin(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(HashJoin);
|
|
|
|
|
|
|
|
ReadCommonJoin(&local_node->join);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(hashclauses);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readMaterial
|
|
|
|
*/
|
|
|
|
static Material *
|
|
|
|
_readMaterial(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS_NO_FIELDS(Material);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readSort
|
|
|
|
*/
|
|
|
|
static Sort *
|
|
|
|
_readSort(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Sort);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_INT_FIELD(numCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(sortColIdx, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(sortOperators, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(collations, local_node->numCols);
|
|
|
|
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readGroup
|
|
|
|
*/
|
|
|
|
static Group *
|
|
|
|
_readGroup(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Group);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_INT_FIELD(numCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(grpOperators, local_node->numCols);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readAgg
|
|
|
|
*/
|
|
|
|
static Agg *
|
|
|
|
_readAgg(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Agg);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(aggstrategy, AggStrategy);
|
2016-06-26 20:33:38 +02:00
|
|
|
READ_ENUM_FIELD(aggsplit, AggSplit);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
READ_INT_FIELD(numCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_OID_ARRAY(grpOperators, local_node->numCols);
|
|
|
|
READ_LONG_FIELD(numGroups);
|
2016-08-24 20:37:50 +02:00
|
|
|
READ_BITMAPSET_FIELD(aggParams);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(groupingSets);
|
|
|
|
READ_NODE_FIELD(chain);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readWindowAgg
|
|
|
|
*/
|
|
|
|
static WindowAgg *
|
|
|
|
_readWindowAgg(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(WindowAgg);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_UINT_FIELD(winref);
|
|
|
|
READ_INT_FIELD(partNumCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols);
|
|
|
|
READ_OID_ARRAY(partOperators, local_node->partNumCols);
|
|
|
|
READ_INT_FIELD(ordNumCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols);
|
|
|
|
READ_OID_ARRAY(ordOperators, local_node->ordNumCols);
|
|
|
|
READ_INT_FIELD(frameOptions);
|
|
|
|
READ_NODE_FIELD(startOffset);
|
|
|
|
READ_NODE_FIELD(endOffset);
|
Support all SQL:2011 options for window frame clauses.
This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING"
frame boundaries in window functions. We'd punted on that back in the
original patch to add window functions, because it was not clear how to
do it in a reasonably data-type-extensible fashion. That problem is
resolved here by adding the ability for btree operator classes to provide
an "in_range" support function that defines how to add or subtract the
RANGE offset value. Factoring it this way also allows the operator class
to avoid overflow problems near the ends of the datatype's range, if it
wishes to expend effort on that. (In the committed patch, the integer
opclasses handle that issue, but it did not seem worth the trouble to
avoid overflow failures for datetime types.)
The patch includes in_range support for the integer_ops opfamily
(int2/int4/int8) as well as the standard datetime types. Support for
other numeric types has been requested, but that seems like suitable
material for a follow-on patch.
In addition, the patch adds GROUPS mode which counts the offset in
ORDER-BY peer groups rather than rows, and it adds the frame_exclusion
options specified by SQL:2011. As far as I can see, we are now fully
up to spec on window framing options.
Existing behaviors remain unchanged, except that I changed the errcode
for a couple of existing error reports to meet the SQL spec's expectation
that negative "offset" values should be reported as SQLSTATE 22013.
Internally and in relevant parts of the documentation, we now consistently
use the terminology "offset PRECEDING/FOLLOWING" rather than "value
PRECEDING/FOLLOWING", since the term "value" is confusingly vague.
Oliver Ford, reviewed and whacked around some by me
Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
2018-02-07 06:06:50 +01:00
|
|
|
READ_OID_FIELD(startInRangeFunc);
|
|
|
|
READ_OID_FIELD(endInRangeFunc);
|
|
|
|
READ_OID_FIELD(inRangeColl);
|
|
|
|
READ_BOOL_FIELD(inRangeAsc);
|
|
|
|
READ_BOOL_FIELD(inRangeNullsFirst);
|
2015-09-23 17:51:50 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readUnique
|
|
|
|
*/
|
|
|
|
static Unique *
|
|
|
|
_readUnique(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Unique);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_INT_FIELD(numCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(uniqOperators, local_node->numCols);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2015-10-01 15:15:36 +02:00
|
|
|
/*
|
|
|
|
* _readGather
|
|
|
|
*/
|
|
|
|
static Gather *
|
|
|
|
_readGather(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Gather);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_INT_FIELD(num_workers);
|
Force rescanning of parallel-aware scan nodes below a Gather[Merge].
The ExecReScan machinery contains various optimizations for postponing
or skipping rescans of plan subtrees; for example a HashAgg node may
conclude that it can re-use the table it built before, instead of
re-reading its input subtree. But that is wrong if the input contains
a parallel-aware table scan node, since the portion of the table scanned
by the leader process is likely to vary from one rescan to the next.
This explains the timing-dependent buildfarm failures we saw after
commit a2b70c89c.
The established mechanism for showing that a plan node's output is
potentially variable is to mark it as depending on some runtime Param.
Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC
parameter number, but carries no actual value) associated with each Gather
or GatherMerge node, mark parallel-aware nodes below that node as dependent
on that Param, and arrange for ExecReScanGather[Merge] to flag that Param
as changed whenever the Gather[Merge] node is rescanned.
This solution breaks an undocumented assumption made by the parallel
executor logic, namely that all rescans of nodes below a Gather[Merge]
will happen synchronously during the ReScan of the top node itself.
But that's fundamentally contrary to the design of the ExecReScan code,
and so was doomed to fail someday anyway (even if you want to argue
that the bug being fixed here wasn't a failure of that assumption).
A follow-on patch will address that issue. In the meantime, the worst
that's expected to happen is that given very bad timing luck, the leader
might have to do all the work during a rescan, because workers think
they have nothing to do, if they are able to start up before the eventual
ReScan of the leader's parallel-aware table scan node has reset the
shared scan state.
Although this problem exists in 9.6, there does not seem to be any way
for it to manifest there. Without GatherMerge, it seems that a plan tree
that has a rescan-short-circuiting node below Gather will always also
have one above it that will short-circuit in the same cases, preventing
the Gather from being rescanned. Hence we won't take the risk of
back-patching this change into 9.6. But v10 needs it.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
|
|
|
READ_INT_FIELD(rescan_param);
|
2015-10-01 15:15:36 +02:00
|
|
|
READ_BOOL_FIELD(single_copy);
|
2016-02-07 17:39:22 +01:00
|
|
|
READ_BOOL_FIELD(invisible);
|
2017-11-16 18:06:14 +01:00
|
|
|
READ_BITMAPSET_FIELD(initParam);
|
2015-10-01 15:15:36 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
/*
|
|
|
|
* _readGatherMerge
|
|
|
|
*/
|
|
|
|
static GatherMerge *
|
|
|
|
_readGatherMerge(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(GatherMerge);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_INT_FIELD(num_workers);
|
Force rescanning of parallel-aware scan nodes below a Gather[Merge].
The ExecReScan machinery contains various optimizations for postponing
or skipping rescans of plan subtrees; for example a HashAgg node may
conclude that it can re-use the table it built before, instead of
re-reading its input subtree. But that is wrong if the input contains
a parallel-aware table scan node, since the portion of the table scanned
by the leader process is likely to vary from one rescan to the next.
This explains the timing-dependent buildfarm failures we saw after
commit a2b70c89c.
The established mechanism for showing that a plan node's output is
potentially variable is to mark it as depending on some runtime Param.
Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC
parameter number, but carries no actual value) associated with each Gather
or GatherMerge node, mark parallel-aware nodes below that node as dependent
on that Param, and arrange for ExecReScanGather[Merge] to flag that Param
as changed whenever the Gather[Merge] node is rescanned.
This solution breaks an undocumented assumption made by the parallel
executor logic, namely that all rescans of nodes below a Gather[Merge]
will happen synchronously during the ReScan of the top node itself.
But that's fundamentally contrary to the design of the ExecReScan code,
and so was doomed to fail someday anyway (even if you want to argue
that the bug being fixed here wasn't a failure of that assumption).
A follow-on patch will address that issue. In the meantime, the worst
that's expected to happen is that given very bad timing luck, the leader
might have to do all the work during a rescan, because workers think
they have nothing to do, if they are able to start up before the eventual
ReScan of the leader's parallel-aware table scan node has reset the
shared scan state.
Although this problem exists in 9.6, there does not seem to be any way
for it to manifest there. Without GatherMerge, it seems that a plan tree
that has a rescan-short-circuiting node below Gather will always also
have one above it that will short-circuit in the same cases, preventing
the Gather from being rescanned. Hence we won't take the risk of
back-patching this change into 9.6. But v10 needs it.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
|
|
|
READ_INT_FIELD(rescan_param);
|
2017-03-09 13:40:36 +01:00
|
|
|
READ_INT_FIELD(numCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(sortColIdx, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(sortOperators, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(collations, local_node->numCols);
|
|
|
|
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
|
2017-11-16 18:06:14 +01:00
|
|
|
READ_BITMAPSET_FIELD(initParam);
|
2017-03-09 13:40:36 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2015-09-23 17:51:50 +02:00
|
|
|
/*
|
|
|
|
* _readHash
|
|
|
|
*/
|
|
|
|
static Hash *
|
|
|
|
_readHash(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Hash);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_OID_FIELD(skewTable);
|
|
|
|
READ_INT_FIELD(skewColumn);
|
|
|
|
READ_BOOL_FIELD(skewInherit);
|
Add parallel-aware hash joins.
Introduce parallel-aware hash joins that appear in EXPLAIN plans as Parallel
Hash Join with Parallel Hash. While hash joins could already appear in
parallel queries, they were previously always parallel-oblivious and had a
partial subplan only on the outer side, meaning that the work of the inner
subplan was duplicated in every worker.
After this commit, the planner will consider using a partial subplan on the
inner side too, using the Parallel Hash node to divide the work over the
available CPU cores and combine its results in shared memory. If the join
needs to be split into multiple batches in order to respect work_mem, then
workers process different batches as much as possible and then work together
on the remaining batches.
The advantages of a parallel-aware hash join over a parallel-oblivious hash
join used in a parallel query are that it:
* avoids wasting memory on duplicated hash tables
* avoids wasting disk space on duplicated batch files
* divides the work of building the hash table over the CPUs
One disadvantage is that there is some communication between the participating
CPUs which might outweigh the benefits of parallelism in the case of small
hash tables. This is avoided by the planner's existing reluctance to supply
partial plans for small scans, but it may be necessary to estimate
synchronization costs in future if that situation changes. Another is that
outer batch 0 must be written to disk if multiple batches are required.
A potential future advantage of parallel-aware hash joins is that right and
full outer joins could be supported, since there is a single set of matched
bits for each hashtable, but that is not yet implemented.
A new GUC enable_parallel_hash is defined to control the feature, defaulting
to on.
Author: Thomas Munro
Reviewed-By: Andres Freund, Robert Haas
Tested-By: Rafia Sabih, Prabhat Sahu
Discussion:
https://postgr.es/m/CAEepm=2W=cOkiZxcg6qiFQP-dHUe09aqTrEMM7yJDrHMhDv_RA@mail.gmail.com
https://postgr.es/m/CAEepm=37HKyJ4U6XOLi=JgfSHM3o6B-GaeO-6hkOmneTDkH+Uw@mail.gmail.com
2017-12-21 08:39:21 +01:00
|
|
|
READ_FLOAT_FIELD(rows_total);
|
2015-09-23 17:51:50 +02:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readSetOp
|
|
|
|
*/
|
|
|
|
static SetOp *
|
|
|
|
_readSetOp(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(SetOp);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(cmd, SetOpCmd);
|
|
|
|
READ_ENUM_FIELD(strategy, SetOpStrategy);
|
|
|
|
READ_INT_FIELD(numCols);
|
|
|
|
READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
|
|
|
|
READ_OID_ARRAY(dupOperators, local_node->numCols);
|
|
|
|
READ_INT_FIELD(flagColIdx);
|
|
|
|
READ_INT_FIELD(firstFlag);
|
|
|
|
READ_LONG_FIELD(numGroups);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readLockRows
|
|
|
|
*/
|
|
|
|
static LockRows *
|
|
|
|
_readLockRows(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(LockRows);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(rowMarks);
|
|
|
|
READ_INT_FIELD(epqParam);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readLimit
|
|
|
|
*/
|
|
|
|
static Limit *
|
|
|
|
_readLimit(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(Limit);
|
|
|
|
|
|
|
|
ReadCommonPlan(&local_node->plan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(limitOffset);
|
|
|
|
READ_NODE_FIELD(limitCount);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readNestLoopParam
|
|
|
|
*/
|
|
|
|
static NestLoopParam *
|
|
|
|
_readNestLoopParam(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(NestLoopParam);
|
|
|
|
|
|
|
|
READ_INT_FIELD(paramno);
|
|
|
|
READ_NODE_FIELD(paramval);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readPlanRowMark
|
|
|
|
*/
|
|
|
|
static PlanRowMark *
|
|
|
|
_readPlanRowMark(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(PlanRowMark);
|
|
|
|
|
|
|
|
READ_UINT_FIELD(rti);
|
|
|
|
READ_UINT_FIELD(prti);
|
|
|
|
READ_UINT_FIELD(rowmarkId);
|
|
|
|
READ_ENUM_FIELD(markType, RowMarkType);
|
|
|
|
READ_INT_FIELD(allMarkTypes);
|
|
|
|
READ_ENUM_FIELD(strength, LockClauseStrength);
|
|
|
|
READ_ENUM_FIELD(waitPolicy, LockWaitPolicy);
|
|
|
|
READ_BOOL_FIELD(isParent);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readPlanInvalItem
|
|
|
|
*/
|
|
|
|
static PlanInvalItem *
|
|
|
|
_readPlanInvalItem(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(PlanInvalItem);
|
|
|
|
|
|
|
|
READ_INT_FIELD(cacheId);
|
|
|
|
READ_UINT_FIELD(hashValue);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readSubPlan
|
|
|
|
*/
|
|
|
|
static SubPlan *
|
|
|
|
_readSubPlan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(SubPlan);
|
|
|
|
|
|
|
|
READ_ENUM_FIELD(subLinkType, SubLinkType);
|
|
|
|
READ_NODE_FIELD(testexpr);
|
|
|
|
READ_NODE_FIELD(paramIds);
|
|
|
|
READ_INT_FIELD(plan_id);
|
|
|
|
READ_STRING_FIELD(plan_name);
|
|
|
|
READ_OID_FIELD(firstColType);
|
|
|
|
READ_INT_FIELD(firstColTypmod);
|
|
|
|
READ_OID_FIELD(firstColCollation);
|
|
|
|
READ_BOOL_FIELD(useHashTable);
|
|
|
|
READ_BOOL_FIELD(unknownEqFalse);
|
2017-02-15 00:09:47 +01:00
|
|
|
READ_BOOL_FIELD(parallel_safe);
|
2015-09-23 17:51:50 +02:00
|
|
|
READ_NODE_FIELD(setParam);
|
|
|
|
READ_NODE_FIELD(parParam);
|
|
|
|
READ_NODE_FIELD(args);
|
|
|
|
READ_FLOAT_FIELD(startup_cost);
|
|
|
|
READ_FLOAT_FIELD(per_call_cost);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readAlternativeSubPlan
|
|
|
|
*/
|
|
|
|
static AlternativeSubPlan *
|
|
|
|
_readAlternativeSubPlan(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(AlternativeSubPlan);
|
|
|
|
|
|
|
|
READ_NODE_FIELD(subplans);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
/*
|
|
|
|
* _readExtensibleNode
|
|
|
|
*/
|
|
|
|
static ExtensibleNode *
|
|
|
|
_readExtensibleNode(void)
|
|
|
|
{
|
|
|
|
const ExtensibleNodeMethods *methods;
|
|
|
|
ExtensibleNode *local_node;
|
2016-06-10 00:02:36 +02:00
|
|
|
const char *extnodename;
|
|
|
|
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
READ_TEMP_LOCALS();
|
|
|
|
|
2016-07-07 22:13:37 +02:00
|
|
|
token = pg_strtok(&length); /* skip :extnodename */
|
2016-06-10 00:02:36 +02:00
|
|
|
token = pg_strtok(&length); /* get extnodename */
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
|
|
|
|
extnodename = nullable_string(token, length);
|
|
|
|
if (!extnodename)
|
|
|
|
elog(ERROR, "extnodename has to be supplied");
|
|
|
|
methods = GetExtensibleNodeMethods(extnodename, false);
|
|
|
|
|
|
|
|
local_node = (ExtensibleNode *) newNode(methods->node_size,
|
|
|
|
T_ExtensibleNode);
|
|
|
|
local_node->extnodename = extnodename;
|
|
|
|
|
|
|
|
/* deserialize the private fields */
|
|
|
|
methods->nodeRead(local_node);
|
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
/*
|
|
|
|
* _readPartitionBoundSpec
|
|
|
|
*/
|
|
|
|
static PartitionBoundSpec *
|
|
|
|
_readPartitionBoundSpec(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(PartitionBoundSpec);
|
|
|
|
|
|
|
|
READ_CHAR_FIELD(strategy);
|
Allow a partitioned table to have a default partition.
Any tuples that don't route to any other partition will route to the
default partition.
Jeevan Ladhe, Beena Emerson, Ashutosh Bapat, Rahila Syed, and Robert
Haas, with review and testing at various stages by (at least) Rushabh
Lathia, Keith Fiske, Amit Langote, Amul Sul, Rajkumar Raghuanshi, Sven
Kunze, Kyotaro Horiguchi, Thom Brown, Rafia Sabih, and Dilip Kumar.
Discussion: http://postgr.es/m/CAH2L28tbN4SYyhS7YV1YBWcitkqbhSWfQCy0G=apRcC_PEO-bg@mail.gmail.com
Discussion: http://postgr.es/m/CAOG9ApEYj34fWMcvBMBQ-YtqR9fTdXhdN82QEKG0SVZ6zeL1xg@mail.gmail.com
2017-09-08 23:28:04 +02:00
|
|
|
READ_BOOL_FIELD(is_default);
|
Add hash partitioning.
Hash partitioning is useful when you want to partition a growing data
set evenly. This can be useful to keep table sizes reasonable, which
makes maintenance operations such as VACUUM faster, or to enable
partition-wise join.
At present, we still depend on constraint exclusion for partitioning
pruning, and the shape of the partition constraints for hash
partitioning is such that that doesn't work. Work is underway to fix
that, which should both improve performance and make partitioning
pruning work with hash partitioning.
Amul Sul, reviewed and tested by Dilip Kumar, Ashutosh Bapat, Yugo
Nagata, Rajkumar Raghuwanshi, Jesper Pedersen, and by me. A few
final tweaks also by me.
Discussion: http://postgr.es/m/CAAJ_b96fhpJAP=ALbETmeLk1Uni_GFZD938zgenhF49qgDTjaQ@mail.gmail.com
2017-11-10 00:07:25 +01:00
|
|
|
READ_INT_FIELD(modulus);
|
|
|
|
READ_INT_FIELD(remainder);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
READ_NODE_FIELD(listdatums);
|
|
|
|
READ_NODE_FIELD(lowerdatums);
|
|
|
|
READ_NODE_FIELD(upperdatums);
|
2017-05-30 17:32:41 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _readPartitionRangeDatum
|
|
|
|
*/
|
|
|
|
static PartitionRangeDatum *
|
|
|
|
_readPartitionRangeDatum(void)
|
|
|
|
{
|
|
|
|
READ_LOCALS(PartitionRangeDatum);
|
|
|
|
|
Use MINVALUE/MAXVALUE instead of UNBOUNDED for range partition bounds.
Previously, UNBOUNDED meant no lower bound when used in the FROM list,
and no upper bound when used in the TO list, which was OK for
single-column range partitioning, but problematic with multiple
columns. For example, an upper bound of (10.0, UNBOUNDED) would not be
collocated with a lower bound of (10.0, UNBOUNDED), thus making it
difficult or impossible to define contiguous multi-column range
partitions in some cases.
Fix this by using MINVALUE and MAXVALUE instead of UNBOUNDED to
represent a partition column that is unbounded below or above
respectively. This syntax removes any ambiguity, and ensures that if
one partition's lower bound equals another partition's upper bound,
then the partitions are contiguous.
Also drop the constraint prohibiting finite values after an unbounded
column, and just document the fact that any values after MINVALUE or
MAXVALUE are ignored. Previously it was necessary to repeat UNBOUNDED
multiple times, which was needlessly verbose.
Note: Forces a post-PG 10 beta2 initdb.
Report by Amul Sul, original patch by Amit Langote with some
additional hacking by me.
Discussion: https://postgr.es/m/CAAJ_b947mowpLdxL3jo3YLKngRjrq9+Ej4ymduQTfYR+8=YAYQ@mail.gmail.com
2017-07-21 10:20:47 +02:00
|
|
|
READ_ENUM_FIELD(kind, PartitionRangeDatumKind);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
READ_NODE_FIELD(value);
|
2017-05-30 17:32:41 +02:00
|
|
|
READ_LOCATION_FIELD(location);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
|
|
|
|
READ_DONE();
|
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* parseNodeString
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2002-11-25 19:12:12 +01:00
|
|
|
* Given a character string representing a node tree, parseNodeString creates
|
|
|
|
* the internal node structure.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2001-01-07 02:08:48 +01:00
|
|
|
* The string to be read must already have been loaded into pg_strtok().
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1998-02-26 05:46:47 +01:00
|
|
|
Node *
|
2002-11-25 19:12:12 +01:00
|
|
|
parseNodeString(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
void *return_value;
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
READ_TEMP_LOCALS();
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2001-01-07 02:08:48 +01:00
|
|
|
token = pg_strtok(&length);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
#define MATCH(tokname, namelen) \
|
2010-12-22 04:11:40 +01:00
|
|
|
(length == namelen && memcmp(token, tokname, namelen) == 0)
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
if (MATCH("QUERY", 5))
|
1997-09-07 07:04:48 +02:00
|
|
|
return_value = _readQuery();
|
2013-07-18 23:10:16 +02:00
|
|
|
else if (MATCH("WITHCHECKOPTION", 15))
|
|
|
|
return_value = _readWithCheckOption();
|
2008-08-02 23:32:01 +02:00
|
|
|
else if (MATCH("SORTGROUPCLAUSE", 15))
|
|
|
|
return_value = _readSortGroupClause();
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
else if (MATCH("GROUPINGSET", 11))
|
|
|
|
return_value = _readGroupingSet();
|
2008-12-28 19:54:01 +01:00
|
|
|
else if (MATCH("WINDOWCLAUSE", 12))
|
|
|
|
return_value = _readWindowClause();
|
2006-04-30 20:30:40 +02:00
|
|
|
else if (MATCH("ROWMARKCLAUSE", 13))
|
|
|
|
return_value = _readRowMarkClause();
|
2008-10-04 23:56:55 +02:00
|
|
|
else if (MATCH("COMMONTABLEEXPR", 15))
|
|
|
|
return_value = _readCommonTableExpr();
|
2002-11-25 19:12:12 +01:00
|
|
|
else if (MATCH("SETOPERATIONSTMT", 16))
|
2000-10-05 21:11:39 +02:00
|
|
|
return_value = _readSetOperationStmt();
|
2002-12-12 16:49:42 +01:00
|
|
|
else if (MATCH("ALIAS", 5))
|
|
|
|
return_value = _readAlias();
|
|
|
|
else if (MATCH("RANGEVAR", 8))
|
|
|
|
return_value = _readRangeVar();
|
2007-02-20 18:32:18 +01:00
|
|
|
else if (MATCH("INTOCLAUSE", 10))
|
|
|
|
return_value = _readIntoClause();
|
2017-03-08 16:39:37 +01:00
|
|
|
else if (MATCH("TABLEFUNC", 9))
|
|
|
|
return_value = _readTableFunc();
|
2002-12-12 16:49:42 +01:00
|
|
|
else if (MATCH("VAR", 3))
|
|
|
|
return_value = _readVar();
|
|
|
|
else if (MATCH("CONST", 5))
|
|
|
|
return_value = _readConst();
|
|
|
|
else if (MATCH("PARAM", 5))
|
|
|
|
return_value = _readParam();
|
|
|
|
else if (MATCH("AGGREF", 6))
|
|
|
|
return_value = _readAggref();
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
else if (MATCH("GROUPINGFUNC", 12))
|
|
|
|
return_value = _readGroupingFunc();
|
2008-12-28 19:54:01 +01:00
|
|
|
else if (MATCH("WINDOWFUNC", 10))
|
|
|
|
return_value = _readWindowFunc();
|
2002-12-12 16:49:42 +01:00
|
|
|
else if (MATCH("ARRAYREF", 8))
|
|
|
|
return_value = _readArrayRef();
|
|
|
|
else if (MATCH("FUNCEXPR", 8))
|
|
|
|
return_value = _readFuncExpr();
|
2009-10-08 04:39:25 +02:00
|
|
|
else if (MATCH("NAMEDARGEXPR", 12))
|
|
|
|
return_value = _readNamedArgExpr();
|
2002-12-12 16:49:42 +01:00
|
|
|
else if (MATCH("OPEXPR", 6))
|
|
|
|
return_value = _readOpExpr();
|
|
|
|
else if (MATCH("DISTINCTEXPR", 12))
|
|
|
|
return_value = _readDistinctExpr();
|
2011-03-20 01:29:08 +01:00
|
|
|
else if (MATCH("NULLIFEXPR", 10))
|
|
|
|
return_value = _readNullIfExpr();
|
2003-06-29 02:33:44 +02:00
|
|
|
else if (MATCH("SCALARARRAYOPEXPR", 17))
|
|
|
|
return_value = _readScalarArrayOpExpr();
|
2002-12-12 16:49:42 +01:00
|
|
|
else if (MATCH("BOOLEXPR", 8))
|
|
|
|
return_value = _readBoolExpr();
|
|
|
|
else if (MATCH("SUBLINK", 7))
|
|
|
|
return_value = _readSubLink();
|
|
|
|
else if (MATCH("FIELDSELECT", 11))
|
|
|
|
return_value = _readFieldSelect();
|
2004-06-09 21:08:20 +02:00
|
|
|
else if (MATCH("FIELDSTORE", 10))
|
|
|
|
return_value = _readFieldStore();
|
2002-12-12 16:49:42 +01:00
|
|
|
else if (MATCH("RELABELTYPE", 11))
|
|
|
|
return_value = _readRelabelType();
|
2007-06-05 23:31:09 +02:00
|
|
|
else if (MATCH("COERCEVIAIO", 11))
|
|
|
|
return_value = _readCoerceViaIO();
|
2007-03-28 01:21:12 +02:00
|
|
|
else if (MATCH("ARRAYCOERCEEXPR", 15))
|
|
|
|
return_value = _readArrayCoerceExpr();
|
2004-12-12 00:26:51 +01:00
|
|
|
else if (MATCH("CONVERTROWTYPEEXPR", 18))
|
|
|
|
return_value = _readConvertRowtypeExpr();
|
2011-03-11 22:27:51 +01:00
|
|
|
else if (MATCH("COLLATE", 7))
|
|
|
|
return_value = _readCollateExpr();
|
2002-11-25 19:12:12 +01:00
|
|
|
else if (MATCH("CASE", 4))
|
1998-12-14 01:02:17 +01:00
|
|
|
return_value = _readCaseExpr();
|
2002-11-25 19:12:12 +01:00
|
|
|
else if (MATCH("WHEN", 4))
|
1998-12-14 01:02:17 +01:00
|
|
|
return_value = _readCaseWhen();
|
2004-03-17 21:48:43 +01:00
|
|
|
else if (MATCH("CASETESTEXPR", 12))
|
|
|
|
return_value = _readCaseTestExpr();
|
2003-04-09 01:20:04 +02:00
|
|
|
else if (MATCH("ARRAY", 5))
|
|
|
|
return_value = _readArrayExpr();
|
2004-05-11 00:44:49 +02:00
|
|
|
else if (MATCH("ROW", 3))
|
|
|
|
return_value = _readRowExpr();
|
2005-12-28 02:30:02 +01:00
|
|
|
else if (MATCH("ROWCOMPARE", 10))
|
|
|
|
return_value = _readRowCompareExpr();
|
2003-02-16 03:30:39 +01:00
|
|
|
else if (MATCH("COALESCE", 8))
|
|
|
|
return_value = _readCoalesceExpr();
|
2005-06-27 00:05:42 +02:00
|
|
|
else if (MATCH("MINMAX", 6))
|
|
|
|
return_value = _readMinMaxExpr();
|
2016-08-17 02:33:01 +02:00
|
|
|
else if (MATCH("SQLVALUEFUNCTION", 16))
|
|
|
|
return_value = _readSQLValueFunction();
|
2006-12-24 01:29:20 +01:00
|
|
|
else if (MATCH("XMLEXPR", 7))
|
|
|
|
return_value = _readXmlExpr();
|
2002-11-25 19:12:12 +01:00
|
|
|
else if (MATCH("NULLTEST", 8))
|
2001-06-20 00:39:12 +02:00
|
|
|
return_value = _readNullTest();
|
2002-11-25 19:12:12 +01:00
|
|
|
else if (MATCH("BOOLEANTEST", 11))
|
2001-06-20 00:39:12 +02:00
|
|
|
return_value = _readBooleanTest();
|
2003-02-03 22:15:45 +01:00
|
|
|
else if (MATCH("COERCETODOMAIN", 14))
|
|
|
|
return_value = _readCoerceToDomain();
|
|
|
|
else if (MATCH("COERCETODOMAINVALUE", 19))
|
|
|
|
return_value = _readCoerceToDomainValue();
|
2003-07-03 18:34:26 +02:00
|
|
|
else if (MATCH("SETTODEFAULT", 12))
|
|
|
|
return_value = _readSetToDefault();
|
2007-06-11 03:16:30 +02:00
|
|
|
else if (MATCH("CURRENTOFEXPR", 13))
|
|
|
|
return_value = _readCurrentOfExpr();
|
Code review for NextValueExpr expression node type.
Add missing infrastructure for this node type, notably in ruleutils.c where
its lack could demonstrably cause EXPLAIN to fail. Add outfuncs/readfuncs
support. (outfuncs support is useful today for debugging purposes. The
readfuncs support may never be needed, since at present it would only
matter for parallel query and NextValueExpr should never appear in a
parallelizable query; but it seems like a bad idea to have a primnode type
that isn't fully supported here.) Teach planner infrastructure that
NextValueExpr is a volatile, parallel-unsafe, non-leaky expression node
with cost cpu_operator_cost. Given its limited scope of usage, there
*might* be no live bug today from the lack of that knowledge, but it's
certainly going to bite us on the rear someday. Teach pg_stat_statements
about the new node type, too.
While at it, also teach cost_qual_eval() that MinMaxExpr, SQLValueFunction,
XmlExpr, and CoerceToDomain should be charged as cpu_operator_cost.
Failing to do this for SQLValueFunction was an oversight in my commit
0bb51aa96. The others are longer-standing oversights, but no time like the
present to fix them. (In principle, CoerceToDomain could have cost much
higher than this, but it doesn't presently seem worth trying to examine the
domain's constraints here.)
Modify execExprInterp.c to execute NextValueExpr as an out-of-line
function; it seems quite unlikely to me that it's worth insisting that
it be inlined in all expression eval methods. Besides, providing the
out-of-line function doesn't stop anyone from inlining if they want to.
Adjust some places where NextValueExpr support had been inserted with the
aid of a dartboard rather than keeping it in the same order as elsewhere.
Discussion: https://postgr.es/m/23862.1499981661@sss.pgh.pa.us
2017-07-14 21:25:43 +02:00
|
|
|
else if (MATCH("NEXTVALUEEXPR", 13))
|
|
|
|
return_value = _readNextValueExpr();
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
else if (MATCH("INFERENCEELEM", 13))
|
|
|
|
return_value = _readInferenceElem();
|
2002-12-12 16:49:42 +01:00
|
|
|
else if (MATCH("TARGETENTRY", 11))
|
|
|
|
return_value = _readTargetEntry();
|
|
|
|
else if (MATCH("RANGETBLREF", 11))
|
|
|
|
return_value = _readRangeTblRef();
|
|
|
|
else if (MATCH("JOINEXPR", 8))
|
|
|
|
return_value = _readJoinExpr();
|
|
|
|
else if (MATCH("FROMEXPR", 8))
|
|
|
|
return_value = _readFromExpr();
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
else if (MATCH("ONCONFLICTEXPR", 14))
|
|
|
|
return_value = _readOnConflictExpr();
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
else if (MATCH("PARTITIONPRUNESTEPOP", 20))
|
|
|
|
return_value = _readPartitionPruneStepOp();
|
|
|
|
else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25))
|
|
|
|
return_value = _readPartitionPruneStepCombine();
|
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query
quals that appear in the parsed query. This is good but limiting, as
there can be parameters that appear later that can be usefully used to
further prune partitions.
This commit adds support for pruning subnodes of Append which cannot
possibly contain any matching tuples, during execution, by evaluating
Params to determine the minimum set of subnodes that can possibly match.
We support more than just simple Params in WHERE clauses. Support
additionally includes:
1. Parameterized Nested Loop Joins: The parameter from the outer side of the
join can be used to determine the minimum set of inner side partitions to
scan.
2. Initplans: Once an initplan has been executed we can then determine which
partitions match the value from the initplan.
Partition pruning is performed in two ways. When Params external to the plan
are found to match the partition key we attempt to prune away unneeded Append
subplans during the initialization of the executor. This allows us to bypass
the initialization of non-matching subplans meaning they won't appear in the
EXPLAIN or EXPLAIN ANALYZE output.
For parameters whose value is only known during the actual execution
then the pruning of these subplans must wait. Subplans which are
eliminated during this stage of pruning are still visible in the EXPLAIN
output. In order to determine if pruning has actually taken place, the
EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never
executed due to the elimination of the partition then the execution
timing area will state "(never executed)". Whereas, if, for example in
the case of parameterized nested loops, the number of loops stated in
the EXPLAIN ANALYZE output for certain subplans may appear lower than
others due to the subplan having been scanned fewer times. This is due
to the list of matching subnodes having to be evaluated whenever a
parameter which was found to match the partition key changes.
This commit required some additional infrastructure that permits the
building of a data structure which is able to perform the translation of
the matching partition IDs, as returned by get_matching_partitions, into
the list index of a subpaths list, as exist in node types such as
Append, MergeAppend and ModifyTable. This allows us to translate a list
of clauses into a Bitmapset of all the subpath indexes which must be
included to satisfy the clause list.
Author: David Rowley, based on an earlier effort by Beena Emerson
Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi,
Jesper Pedersen
Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
2018-04-07 22:54:31 +02:00
|
|
|
else if (MATCH("PARTITIONPRUNEINFO", 18))
|
|
|
|
return_value = _readPartitionPruneInfo();
|
2002-12-12 16:49:42 +01:00
|
|
|
else if (MATCH("RTE", 3))
|
|
|
|
return_value = _readRangeTblEntry();
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
else if (MATCH("RANGETBLFUNCTION", 16))
|
|
|
|
return_value = _readRangeTblFunction();
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
else if (MATCH("TABLESAMPLECLAUSE", 17))
|
|
|
|
return_value = _readTableSampleClause();
|
2003-03-10 04:53:52 +01:00
|
|
|
else if (MATCH("NOTIFY", 6))
|
|
|
|
return_value = _readNotifyStmt();
|
2015-09-23 17:51:50 +02:00
|
|
|
else if (MATCH("DEFELEM", 7))
|
|
|
|
return_value = _readDefElem();
|
2003-03-10 04:53:52 +01:00
|
|
|
else if (MATCH("DECLARECURSOR", 13))
|
|
|
|
return_value = _readDeclareCursorStmt();
|
2015-09-23 17:51:50 +02:00
|
|
|
else if (MATCH("PLANNEDSTMT", 11))
|
|
|
|
return_value = _readPlannedStmt();
|
|
|
|
else if (MATCH("PLAN", 4))
|
|
|
|
return_value = _readPlan();
|
|
|
|
else if (MATCH("RESULT", 6))
|
|
|
|
return_value = _readResult();
|
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
|
|
|
else if (MATCH("PROJECTSET", 10))
|
|
|
|
return_value = _readProjectSet();
|
2015-09-23 17:51:50 +02:00
|
|
|
else if (MATCH("MODIFYTABLE", 11))
|
|
|
|
return_value = _readModifyTable();
|
|
|
|
else if (MATCH("APPEND", 6))
|
|
|
|
return_value = _readAppend();
|
|
|
|
else if (MATCH("MERGEAPPEND", 11))
|
|
|
|
return_value = _readMergeAppend();
|
|
|
|
else if (MATCH("RECURSIVEUNION", 14))
|
|
|
|
return_value = _readRecursiveUnion();
|
|
|
|
else if (MATCH("BITMAPAND", 9))
|
|
|
|
return_value = _readBitmapAnd();
|
|
|
|
else if (MATCH("BITMAPOR", 8))
|
|
|
|
return_value = _readBitmapOr();
|
|
|
|
else if (MATCH("SCAN", 4))
|
|
|
|
return_value = _readScan();
|
|
|
|
else if (MATCH("SEQSCAN", 7))
|
|
|
|
return_value = _readSeqScan();
|
|
|
|
else if (MATCH("SAMPLESCAN", 10))
|
|
|
|
return_value = _readSampleScan();
|
|
|
|
else if (MATCH("INDEXSCAN", 9))
|
|
|
|
return_value = _readIndexScan();
|
|
|
|
else if (MATCH("INDEXONLYSCAN", 13))
|
|
|
|
return_value = _readIndexOnlyScan();
|
|
|
|
else if (MATCH("BITMAPINDEXSCAN", 15))
|
|
|
|
return_value = _readBitmapIndexScan();
|
|
|
|
else if (MATCH("BITMAPHEAPSCAN", 14))
|
|
|
|
return_value = _readBitmapHeapScan();
|
|
|
|
else if (MATCH("TIDSCAN", 7))
|
|
|
|
return_value = _readTidScan();
|
|
|
|
else if (MATCH("SUBQUERYSCAN", 12))
|
|
|
|
return_value = _readSubqueryScan();
|
|
|
|
else if (MATCH("FUNCTIONSCAN", 12))
|
|
|
|
return_value = _readFunctionScan();
|
|
|
|
else if (MATCH("VALUESSCAN", 10))
|
|
|
|
return_value = _readValuesScan();
|
2017-03-08 16:39:37 +01:00
|
|
|
else if (MATCH("TABLEFUNCSCAN", 13))
|
|
|
|
return_value = _readTableFuncScan();
|
2015-09-23 17:51:50 +02:00
|
|
|
else if (MATCH("CTESCAN", 7))
|
|
|
|
return_value = _readCteScan();
|
|
|
|
else if (MATCH("WORKTABLESCAN", 13))
|
|
|
|
return_value = _readWorkTableScan();
|
|
|
|
else if (MATCH("FOREIGNSCAN", 11))
|
|
|
|
return_value = _readForeignScan();
|
2015-11-12 13:40:31 +01:00
|
|
|
else if (MATCH("CUSTOMSCAN", 10))
|
|
|
|
return_value = _readCustomScan();
|
2015-09-23 17:51:50 +02:00
|
|
|
else if (MATCH("JOIN", 4))
|
|
|
|
return_value = _readJoin();
|
|
|
|
else if (MATCH("NESTLOOP", 8))
|
|
|
|
return_value = _readNestLoop();
|
|
|
|
else if (MATCH("MERGEJOIN", 9))
|
|
|
|
return_value = _readMergeJoin();
|
|
|
|
else if (MATCH("HASHJOIN", 8))
|
|
|
|
return_value = _readHashJoin();
|
|
|
|
else if (MATCH("MATERIAL", 8))
|
|
|
|
return_value = _readMaterial();
|
|
|
|
else if (MATCH("SORT", 4))
|
|
|
|
return_value = _readSort();
|
|
|
|
else if (MATCH("GROUP", 5))
|
|
|
|
return_value = _readGroup();
|
|
|
|
else if (MATCH("AGG", 3))
|
|
|
|
return_value = _readAgg();
|
|
|
|
else if (MATCH("WINDOWAGG", 9))
|
|
|
|
return_value = _readWindowAgg();
|
|
|
|
else if (MATCH("UNIQUE", 6))
|
|
|
|
return_value = _readUnique();
|
2015-10-01 15:15:36 +02:00
|
|
|
else if (MATCH("GATHER", 6))
|
|
|
|
return_value = _readGather();
|
2017-03-09 13:40:36 +01:00
|
|
|
else if (MATCH("GATHERMERGE", 11))
|
|
|
|
return_value = _readGatherMerge();
|
2015-09-23 17:51:50 +02:00
|
|
|
else if (MATCH("HASH", 4))
|
|
|
|
return_value = _readHash();
|
|
|
|
else if (MATCH("SETOP", 5))
|
|
|
|
return_value = _readSetOp();
|
|
|
|
else if (MATCH("LOCKROWS", 8))
|
|
|
|
return_value = _readLockRows();
|
|
|
|
else if (MATCH("LIMIT", 5))
|
|
|
|
return_value = _readLimit();
|
|
|
|
else if (MATCH("NESTLOOPPARAM", 13))
|
|
|
|
return_value = _readNestLoopParam();
|
|
|
|
else if (MATCH("PLANROWMARK", 11))
|
|
|
|
return_value = _readPlanRowMark();
|
|
|
|
else if (MATCH("PLANINVALITEM", 13))
|
|
|
|
return_value = _readPlanInvalItem();
|
|
|
|
else if (MATCH("SUBPLAN", 7))
|
|
|
|
return_value = _readSubPlan();
|
|
|
|
else if (MATCH("ALTERNATIVESUBPLAN", 18))
|
|
|
|
return_value = _readAlternativeSubPlan();
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
else if (MATCH("EXTENSIBLENODE", 14))
|
|
|
|
return_value = _readExtensibleNode();
|
2017-05-30 17:32:41 +02:00
|
|
|
else if (MATCH("PARTITIONBOUNDSPEC", 18))
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
return_value = _readPartitionBoundSpec();
|
2017-05-30 17:32:41 +02:00
|
|
|
else if (MATCH("PARTITIONRANGEDATUM", 19))
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
return_value = _readPartitionRangeDatum();
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
2002-11-25 19:12:12 +01:00
|
|
|
{
|
|
|
|
elog(ERROR, "badly formatted node string \"%.32s\"...", token);
|
|
|
|
return_value = NULL; /* keep compiler quiet */
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return (Node *) return_value;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* readDatum
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2001-01-07 02:08:48 +01:00
|
|
|
* Given a string representation of a constant, recreate the appropriate
|
|
|
|
* Datum. The string representation embeds length info, but not byValue,
|
|
|
|
* so we must be told that.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2016-04-08 23:26:36 +02:00
|
|
|
Datum
|
2001-01-07 02:08:48 +01:00
|
|
|
readDatum(bool typbyval)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-01-08 01:31:43 +01:00
|
|
|
Size length,
|
|
|
|
i;
|
1997-09-08 04:41:22 +02:00
|
|
|
int tokenLength;
|
|
|
|
char *token;
|
|
|
|
Datum res;
|
|
|
|
char *s;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* read the actual length of the value
|
|
|
|
*/
|
2001-01-07 02:08:48 +01:00
|
|
|
token = pg_strtok(&tokenLength);
|
2001-01-08 01:31:43 +01:00
|
|
|
length = atoui(token);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2001-07-03 18:52:48 +02:00
|
|
|
token = pg_strtok(&tokenLength); /* read the '[' */
|
|
|
|
if (token == NULL || token[0] != '[')
|
2014-01-23 23:18:23 +01:00
|
|
|
elog(ERROR, "expected \"[\" to start datum, but got \"%s\"; length = %zu",
|
|
|
|
token ? (const char *) token : "[NULL]", length);
|
2001-01-07 02:08:48 +01:00
|
|
|
|
|
|
|
if (typbyval)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2001-01-08 01:31:43 +01:00
|
|
|
if (length > (Size) sizeof(Datum))
|
2014-01-23 23:18:23 +01:00
|
|
|
elog(ERROR, "byval datum but length = %zu", length);
|
2000-01-14 01:53:21 +01:00
|
|
|
res = (Datum) 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
s = (char *) (&res);
|
2001-01-08 01:31:43 +01:00
|
|
|
for (i = 0; i < (Size) sizeof(Datum); i++)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2001-01-07 02:08:48 +01:00
|
|
|
token = pg_strtok(&tokenLength);
|
1997-09-07 07:04:48 +02:00
|
|
|
s[i] = (char) atoi(token);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (length <= 0)
|
2000-01-14 01:53:21 +01:00
|
|
|
res = (Datum) NULL;
|
|
|
|
else
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
|
|
|
s = (char *) palloc(length);
|
|
|
|
for (i = 0; i < length; i++)
|
|
|
|
{
|
2001-01-07 02:08:48 +01:00
|
|
|
token = pg_strtok(&tokenLength);
|
1997-09-07 07:04:48 +02:00
|
|
|
s[i] = (char) atoi(token);
|
|
|
|
}
|
|
|
|
res = PointerGetDatum(s);
|
|
|
|
}
|
|
|
|
|
2001-07-03 18:52:48 +02:00
|
|
|
token = pg_strtok(&tokenLength); /* read the ']' */
|
2001-01-07 02:08:48 +01:00
|
|
|
if (token == NULL || token[0] != ']')
|
2014-01-23 23:18:23 +01:00
|
|
|
elog(ERROR, "expected \"]\" to end datum, but got \"%s\"; length = %zu",
|
|
|
|
token ? (const char *) token : "[NULL]", length);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return res;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2015-09-23 17:51:50 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* readAttrNumberCols
|
|
|
|
*/
|
2016-04-08 23:26:36 +02:00
|
|
|
AttrNumber *
|
2015-09-23 17:51:50 +02:00
|
|
|
readAttrNumberCols(int numCols)
|
|
|
|
{
|
|
|
|
int tokenLength,
|
|
|
|
i;
|
|
|
|
char *token;
|
|
|
|
AttrNumber *attr_vals;
|
|
|
|
|
|
|
|
if (numCols <= 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
attr_vals = (AttrNumber *) palloc(numCols * sizeof(AttrNumber));
|
|
|
|
for (i = 0; i < numCols; i++)
|
|
|
|
{
|
|
|
|
token = pg_strtok(&tokenLength);
|
|
|
|
attr_vals[i] = atoi(token);
|
|
|
|
}
|
|
|
|
|
|
|
|
return attr_vals;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* readOidCols
|
|
|
|
*/
|
2016-04-08 23:26:36 +02:00
|
|
|
Oid *
|
2015-09-23 17:51:50 +02:00
|
|
|
readOidCols(int numCols)
|
|
|
|
{
|
|
|
|
int tokenLength,
|
|
|
|
i;
|
|
|
|
char *token;
|
|
|
|
Oid *oid_vals;
|
|
|
|
|
|
|
|
if (numCols <= 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
oid_vals = (Oid *) palloc(numCols * sizeof(Oid));
|
|
|
|
for (i = 0; i < numCols; i++)
|
|
|
|
{
|
|
|
|
token = pg_strtok(&tokenLength);
|
|
|
|
oid_vals[i] = atooid(token);
|
|
|
|
}
|
|
|
|
|
|
|
|
return oid_vals;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* readIntCols
|
|
|
|
*/
|
2016-04-08 23:26:36 +02:00
|
|
|
int *
|
2015-09-23 17:51:50 +02:00
|
|
|
readIntCols(int numCols)
|
|
|
|
{
|
|
|
|
int tokenLength,
|
|
|
|
i;
|
|
|
|
char *token;
|
|
|
|
int *int_vals;
|
|
|
|
|
|
|
|
if (numCols <= 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
int_vals = (int *) palloc(numCols * sizeof(int));
|
|
|
|
for (i = 0; i < numCols; i++)
|
|
|
|
{
|
|
|
|
token = pg_strtok(&tokenLength);
|
|
|
|
int_vals[i] = atoi(token);
|
|
|
|
}
|
|
|
|
|
|
|
|
return int_vals;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* readBoolCols
|
|
|
|
*/
|
2016-04-08 23:26:36 +02:00
|
|
|
bool *
|
2015-09-23 17:51:50 +02:00
|
|
|
readBoolCols(int numCols)
|
|
|
|
{
|
|
|
|
int tokenLength,
|
|
|
|
i;
|
|
|
|
char *token;
|
|
|
|
bool *bool_vals;
|
|
|
|
|
|
|
|
if (numCols <= 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
bool_vals = (bool *) palloc(numCols * sizeof(bool));
|
|
|
|
for (i = 0; i < numCols; i++)
|
|
|
|
{
|
|
|
|
token = pg_strtok(&tokenLength);
|
|
|
|
bool_vals[i] = strtobool(token);
|
|
|
|
}
|
|
|
|
|
|
|
|
return bool_vals;
|
|
|
|
}
|