postgresql/src/backend/nodes/readfuncs.c

2853 lines
56 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* readfuncs.c
* Reader functions for Postgres tree nodes.
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/nodes/readfuncs.c
*
* NOTES
* Path nodes do not have any readfuncs support, because we never
* have occasion to read them in. (There was once code here that
* claimed to read them, but it was broken as well as unused.) We
* never read executor state trees, either.
*
* Parse location fields are written out by outfuncs.c, but only for
* possible debugging use. When reading a location field, we discard
* the stored value and set the location field to -1 (ie, "unknown").
* This is because nodes coming from a stored rule should not be thought
* to have a known location in the current query's text.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "fmgr.h"
#include "nodes/extensible.h"
#include "nodes/parsenodes.h"
#include "nodes/plannodes.h"
#include "nodes/readfuncs.h"
#include "utils/builtins.h"
/*
* Macros to simplify reading of different kinds of fields. Use these
* wherever possible to reduce the chance for silly typos. Note that these
* hard-wire conventions about the names of the local variables in a Read
* routine.
*/
/* Macros for declaring appropriate local variables */
/* A few guys need only local_node */
#define READ_LOCALS_NO_FIELDS(nodeTypeName) \
nodeTypeName *local_node = makeNode(nodeTypeName)
/* And a few guys need only the pg_strtok support fields */
#define READ_TEMP_LOCALS() \
char *token; \
int length
/* ... but most need both */
#define READ_LOCALS(nodeTypeName) \
READ_LOCALS_NO_FIELDS(nodeTypeName); \
READ_TEMP_LOCALS()
/* Read an integer field (anything written as ":fldname %d") */
#define READ_INT_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = atoi(token)
/* Read an unsigned integer field (anything written as ":fldname %u") */
#define READ_UINT_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = atoui(token)
/* Read an unsigned integer field (anything written using UINT64_FORMAT) */
#define READ_UINT64_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = pg_strtouint64(token, NULL, 10)
/* Read an long integer field (anything written as ":fldname %ld") */
#define READ_LONG_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = atol(token)
/* Read an OID field (don't hard-wire assumption that OID is same as uint) */
#define READ_OID_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = atooid(token)
/* Read a char field (ie, one ascii character) */
#define READ_CHAR_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
/* avoid overhead of calling debackslash() for one char */ \
local_node->fldname = (length == 0) ? '\0' : (token[0] == '\\' ? token[1] : token[0])
/* Read an enumerated-type field that was written as an integer code */
#define READ_ENUM_FIELD(fldname, enumtype) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = (enumtype) atoi(token)
/* Read a float field */
#define READ_FLOAT_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = atof(token)
/* Read a boolean field */
#define READ_BOOL_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = strtobool(token)
/* Read a character-string field */
#define READ_STRING_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = nullable_string(token, length)
/* Read a parse location field (and throw away the value, per notes above) */
#define READ_LOCATION_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
(void) token; /* in case not used elsewhere */ \
local_node->fldname = -1 /* set field to "unknown" */
/* Read a Node field */
#define READ_NODE_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
(void) token; /* in case not used elsewhere */ \
local_node->fldname = nodeRead(NULL, 0)
/* Read a bitmapset field */
#define READ_BITMAPSET_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
(void) token; /* in case not used elsewhere */ \
local_node->fldname = _readBitmapset()
/* Read an attribute number array */
#define READ_ATTRNUMBER_ARRAY(fldname, len) \
token = pg_strtok(&length); /* skip :fldname */ \
local_node->fldname = readAttrNumberCols(len);
/* Read an oid array */
#define READ_OID_ARRAY(fldname, len) \
token = pg_strtok(&length); /* skip :fldname */ \
local_node->fldname = readOidCols(len);
/* Read an int array */
#define READ_INT_ARRAY(fldname, len) \
token = pg_strtok(&length); /* skip :fldname */ \
local_node->fldname = readIntCols(len);
/* Read a bool array */
#define READ_BOOL_ARRAY(fldname, len) \
token = pg_strtok(&length); /* skip :fldname */ \
local_node->fldname = readBoolCols(len);
/* Routine exit */
#define READ_DONE() \
return local_node
/*
* NOTE: use atoi() to read values written with %d, or atoui() to read
* values written with %u in outfuncs.c. An exception is OID values,
* for which use atooid(). (As of 7.1, outfuncs.c writes OIDs as %u,
* but this will probably change in the future.)
*/
#define atoui(x) ((unsigned int) strtoul((x), NULL, 10))
#define strtobool(x) ((*(x) == 't') ? true : false)
#define nullable_string(token,length) \
((length) == 0 ? NULL : debackslash(token, length))
/*
* _readBitmapset
*/
static Bitmapset *
_readBitmapset(void)
{
Bitmapset *result = NULL;
READ_TEMP_LOCALS();
token = pg_strtok(&length);
if (token == NULL)
elog(ERROR, "incomplete Bitmapset structure");
if (length != 1 || token[0] != '(')
elog(ERROR, "unrecognized token: \"%.*s\"", length, token);
token = pg_strtok(&length);
if (token == NULL)
elog(ERROR, "incomplete Bitmapset structure");
if (length != 1 || token[0] != 'b')
elog(ERROR, "unrecognized token: \"%.*s\"", length, token);
for (;;)
{
int val;
char *endptr;
token = pg_strtok(&length);
if (token == NULL)
elog(ERROR, "unterminated Bitmapset structure");
if (length == 1 && token[0] == ')')
break;
val = (int) strtol(token, &endptr, 10);
if (endptr != token + length)
elog(ERROR, "unrecognized integer: \"%.*s\"", length, token);
result = bms_add_member(result, val);
}
return result;
}
/*
* for use by extensions which define extensible nodes
*/
Bitmapset *
readBitmapset(void)
{
return _readBitmapset();
}
/*
* _readQuery
*/
static Query *
_readQuery(void)
{
READ_LOCALS(Query);
READ_ENUM_FIELD(commandType, CmdType);
READ_ENUM_FIELD(querySource, QuerySource);
local_node->queryId = UINT64CONST(0); /* not saved in output format */
READ_BOOL_FIELD(canSetTag);
READ_NODE_FIELD(utilityStmt);
READ_INT_FIELD(resultRelation);
READ_BOOL_FIELD(hasAggs);
READ_BOOL_FIELD(hasWindowFuncs);
Improve parser's and planner's handling of set-returning functions. Teach the parser to reject misplaced set-returning functions during parse analysis using p_expr_kind, in much the same way as we do for aggregates and window functions (cf commit eaccfded9). While this isn't complete (it misses nesting-based restrictions), it's much better than the previous error reporting for such cases, and it allows elimination of assorted ad-hoc expression_returns_set() error checks. We could add nesting checks later if it seems important to catch all cases at parse time. There is one case the parser will now throw error for although previous versions allowed it, which is SRFs in the tlist of an UPDATE. That never behaved sensibly (since it's ill-defined which generated row should be used to perform the update) and it's hard to see why it should not be treated as an error. It's a release-note-worthy change though. Also, add a new Query field hasTargetSRFs reporting whether there are any SRFs in the targetlist (including GROUP BY/ORDER BY expressions). The parser can now set that basically for free during parse analysis, and we can use it in a number of places to avoid expression_returns_set searches. (There will be more such checks soon.) In some places, this allows decontorting the logic since it's no longer expensive to check for SRFs in the tlist --- so I made the checks parallel to the handling of hasAggs/hasWindowFuncs wherever it seemed appropriate. catversion bump because adding a Query field changes stored rules. Andres Freund and Tom Lane Discussion: <24639.1473782855@sss.pgh.pa.us>
2016-09-13 19:54:24 +02:00
READ_BOOL_FIELD(hasTargetSRFs);
READ_BOOL_FIELD(hasSubLinks);
READ_BOOL_FIELD(hasDistinctOn);
READ_BOOL_FIELD(hasRecursive);
READ_BOOL_FIELD(hasModifyingCTE);
READ_BOOL_FIELD(hasForUpdate);
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
READ_BOOL_FIELD(hasRowSecurity);
READ_NODE_FIELD(cteList);
READ_NODE_FIELD(rtable);
READ_NODE_FIELD(jointree);
READ_NODE_FIELD(targetList);
READ_ENUM_FIELD(override, OverridingKind);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
READ_NODE_FIELD(onConflict);
READ_NODE_FIELD(returningList);
READ_NODE_FIELD(groupClause);
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
READ_NODE_FIELD(groupingSets);
READ_NODE_FIELD(havingQual);
READ_NODE_FIELD(windowClause);
READ_NODE_FIELD(distinctClause);
READ_NODE_FIELD(sortClause);
READ_NODE_FIELD(limitOffset);
READ_NODE_FIELD(limitCount);
READ_NODE_FIELD(rowMarks);
READ_NODE_FIELD(setOperations);
READ_NODE_FIELD(constraintDeps);
Change representation of statement lists, and add statement location info. This patch makes several changes that improve the consistency of representation of lists of statements. It's always been the case that the output of parse analysis is a list of Query nodes, whatever the types of the individual statements in the list. This patch brings similar consistency to the outputs of raw parsing and planning steps: * The output of raw parsing is now always a list of RawStmt nodes; the statement-type-dependent nodes are one level down from that. * The output of pg_plan_queries() is now always a list of PlannedStmt nodes, even for utility statements. In the case of a utility statement, "planning" just consists of wrapping a CMD_UTILITY PlannedStmt around the utility node. This list representation is now used in Portal and CachedPlan plan lists, replacing the former convention of intermixing PlannedStmts with bare utility-statement nodes. Now, every list of statements has a consistent head-node type depending on how far along it is in processing. This allows changing many places that formerly used generic "Node *" pointers to use a more specific pointer type, thus reducing the number of IsA() tests and casts needed, as well as improving code clarity. Also, the post-parse-analysis representation of DECLARE CURSOR is changed so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained SELECT remains a child of the DeclareCursorStmt rather than getting flipped around to be the other way. It's now true for both Query and PlannedStmt that utilityStmt is non-null if and only if commandType is CMD_UTILITY. That allows simplifying a lot of places that were testing both fields. (I think some of those were just defensive programming, but in many places, it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.) Because PlannedStmt carries a canSetTag field, we're also able to get rid of some ad-hoc rules about how to reconstruct canSetTag for a bare utility statement; specifically, the assumption that a utility is canSetTag if and only if it's the only one in its list. While I see no near-term need for relaxing that restriction, it's nice to get rid of the ad-hocery. The API of ProcessUtility() is changed so that what it's passed is the wrapper PlannedStmt not just the bare utility statement. This will affect all users of ProcessUtility_hook, but the changes are pretty trivial; see the affected contrib modules for examples of the minimum change needed. (Most compilers should give pointer-type-mismatch warnings for uncorrected code.) There's also a change in the API of ExplainOneQuery_hook, to pass through cursorOptions instead of expecting hook functions to know what to pick. This is needed because of the DECLARE CURSOR changes, but really should have been done in 9.6; it's unlikely that any extant hook functions know about using CURSOR_OPT_PARALLEL_OK. Finally, teach gram.y to save statement boundary locations in RawStmt nodes, and pass those through to Query and PlannedStmt nodes. This allows more intelligent handling of cases where a source query string contains multiple statements. This patch doesn't actually do anything with the information, but a follow-on patch will. (Passing this information through cleanly is the true motivation for these changes; while I think this is all good cleanup, it's unlikely we'd have bothered without this end goal.) catversion bump because addition of location fields to struct Query affects stored rules. This patch is by me, but it owes a good deal to Fabien Coelho who did a lot of preliminary work on the problem, and also reviewed the patch. Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
/* withCheckOptions intentionally omitted, see comment in parsenodes.h */
READ_LOCATION_FIELD(stmt_location);
READ_LOCATION_FIELD(stmt_len);
READ_DONE();
}
/*
* _readNotifyStmt
*/
static NotifyStmt *
_readNotifyStmt(void)
{
READ_LOCALS(NotifyStmt);
READ_STRING_FIELD(conditionname);
READ_STRING_FIELD(payload);
READ_DONE();
}
/*
* _readDeclareCursorStmt
*/
static DeclareCursorStmt *
_readDeclareCursorStmt(void)
{
READ_LOCALS(DeclareCursorStmt);
READ_STRING_FIELD(portalname);
READ_INT_FIELD(options);
READ_NODE_FIELD(query);
READ_DONE();
}
/*
* _readWithCheckOption
*/
static WithCheckOption *
_readWithCheckOption(void)
{
READ_LOCALS(WithCheckOption);
READ_ENUM_FIELD(kind, WCOKind);
READ_STRING_FIELD(relname);
READ_STRING_FIELD(polname);
READ_NODE_FIELD(qual);
READ_BOOL_FIELD(cascaded);
READ_DONE();
}
/*
* _readSortGroupClause
*/
static SortGroupClause *
_readSortGroupClause(void)
{
READ_LOCALS(SortGroupClause);
READ_UINT_FIELD(tleSortGroupRef);
READ_OID_FIELD(eqop);
READ_OID_FIELD(sortop);
READ_BOOL_FIELD(nulls_first);
READ_BOOL_FIELD(hashable);
READ_DONE();
}
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
/*
* _readGroupingSet
*/
static GroupingSet *
_readGroupingSet(void)
{
READ_LOCALS(GroupingSet);
READ_ENUM_FIELD(kind, GroupingSetKind);
READ_NODE_FIELD(content);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readWindowClause
*/
static WindowClause *
_readWindowClause(void)
{
READ_LOCALS(WindowClause);
READ_STRING_FIELD(name);
READ_STRING_FIELD(refname);
READ_NODE_FIELD(partitionClause);
READ_NODE_FIELD(orderClause);
READ_INT_FIELD(frameOptions);
READ_NODE_FIELD(startOffset);
READ_NODE_FIELD(endOffset);
Support all SQL:2011 options for window frame clauses. This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING" frame boundaries in window functions. We'd punted on that back in the original patch to add window functions, because it was not clear how to do it in a reasonably data-type-extensible fashion. That problem is resolved here by adding the ability for btree operator classes to provide an "in_range" support function that defines how to add or subtract the RANGE offset value. Factoring it this way also allows the operator class to avoid overflow problems near the ends of the datatype's range, if it wishes to expend effort on that. (In the committed patch, the integer opclasses handle that issue, but it did not seem worth the trouble to avoid overflow failures for datetime types.) The patch includes in_range support for the integer_ops opfamily (int2/int4/int8) as well as the standard datetime types. Support for other numeric types has been requested, but that seems like suitable material for a follow-on patch. In addition, the patch adds GROUPS mode which counts the offset in ORDER-BY peer groups rather than rows, and it adds the frame_exclusion options specified by SQL:2011. As far as I can see, we are now fully up to spec on window framing options. Existing behaviors remain unchanged, except that I changed the errcode for a couple of existing error reports to meet the SQL spec's expectation that negative "offset" values should be reported as SQLSTATE 22013. Internally and in relevant parts of the documentation, we now consistently use the terminology "offset PRECEDING/FOLLOWING" rather than "value PRECEDING/FOLLOWING", since the term "value" is confusingly vague. Oliver Ford, reviewed and whacked around some by me Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
2018-02-07 06:06:50 +01:00
READ_OID_FIELD(startInRangeFunc);
READ_OID_FIELD(endInRangeFunc);
READ_OID_FIELD(inRangeColl);
READ_BOOL_FIELD(inRangeAsc);
READ_BOOL_FIELD(inRangeNullsFirst);
READ_UINT_FIELD(winref);
READ_BOOL_FIELD(copiedOrder);
READ_DONE();
}
/*
* _readRowMarkClause
*/
static RowMarkClause *
_readRowMarkClause(void)
{
READ_LOCALS(RowMarkClause);
READ_UINT_FIELD(rti);
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
READ_ENUM_FIELD(strength, LockClauseStrength);
READ_ENUM_FIELD(waitPolicy, LockWaitPolicy);
READ_BOOL_FIELD(pushedDown);
READ_DONE();
}
/*
* _readCommonTableExpr
*/
static CommonTableExpr *
_readCommonTableExpr(void)
{
READ_LOCALS(CommonTableExpr);
READ_STRING_FIELD(ctename);
READ_NODE_FIELD(aliascolnames);
READ_NODE_FIELD(ctequery);
READ_LOCATION_FIELD(location);
READ_BOOL_FIELD(cterecursive);
READ_INT_FIELD(cterefcount);
READ_NODE_FIELD(ctecolnames);
READ_NODE_FIELD(ctecoltypes);
READ_NODE_FIELD(ctecoltypmods);
READ_NODE_FIELD(ctecolcollations);
READ_DONE();
}
/*
* _readSetOperationStmt
*/
static SetOperationStmt *
_readSetOperationStmt(void)
{
READ_LOCALS(SetOperationStmt);
READ_ENUM_FIELD(op, SetOperation);
READ_BOOL_FIELD(all);
READ_NODE_FIELD(larg);
READ_NODE_FIELD(rarg);
READ_NODE_FIELD(colTypes);
READ_NODE_FIELD(colTypmods);
READ_NODE_FIELD(colCollations);
READ_NODE_FIELD(groupClauses);
READ_DONE();
}
/*
* Stuff from primnodes.h.
*/
static Alias *
_readAlias(void)
{
READ_LOCALS(Alias);
READ_STRING_FIELD(aliasname);
READ_NODE_FIELD(colnames);
READ_DONE();
}
static RangeVar *
_readRangeVar(void)
{
READ_LOCALS(RangeVar);
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
local_node->catalogname = NULL; /* not currently saved in output format */
READ_STRING_FIELD(schemaname);
READ_STRING_FIELD(relname);
READ_BOOL_FIELD(inh);
READ_CHAR_FIELD(relpersistence);
READ_NODE_FIELD(alias);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readTableFunc
*/
static TableFunc *
_readTableFunc(void)
{
READ_LOCALS(TableFunc);
READ_NODE_FIELD(ns_uris);
READ_NODE_FIELD(ns_names);
READ_NODE_FIELD(docexpr);
READ_NODE_FIELD(rowexpr);
READ_NODE_FIELD(colnames);
READ_NODE_FIELD(coltypes);
READ_NODE_FIELD(coltypmods);
READ_NODE_FIELD(colcollations);
READ_NODE_FIELD(colexprs);
READ_NODE_FIELD(coldefexprs);
READ_BITMAPSET_FIELD(notnulls);
READ_INT_FIELD(ordinalitycol);
READ_LOCATION_FIELD(location);
READ_DONE();
}
static IntoClause *
_readIntoClause(void)
{
READ_LOCALS(IntoClause);
READ_NODE_FIELD(rel);
READ_NODE_FIELD(colNames);
READ_NODE_FIELD(options);
READ_ENUM_FIELD(onCommit, OnCommitAction);
READ_STRING_FIELD(tableSpaceName);
READ_NODE_FIELD(viewQuery);
READ_BOOL_FIELD(skipData);
READ_DONE();
}
/*
* _readVar
*/
static Var *
_readVar(void)
{
READ_LOCALS(Var);
READ_UINT_FIELD(varno);
READ_INT_FIELD(varattno);
READ_OID_FIELD(vartype);
READ_INT_FIELD(vartypmod);
READ_OID_FIELD(varcollid);
READ_UINT_FIELD(varlevelsup);
READ_UINT_FIELD(varnoold);
READ_INT_FIELD(varoattno);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readConst
*/
static Const *
_readConst(void)
{
READ_LOCALS(Const);
READ_OID_FIELD(consttype);
READ_INT_FIELD(consttypmod);
READ_OID_FIELD(constcollid);
READ_INT_FIELD(constlen);
READ_BOOL_FIELD(constbyval);
READ_BOOL_FIELD(constisnull);
READ_LOCATION_FIELD(location);
token = pg_strtok(&length); /* skip :constvalue */
if (local_node->constisnull)
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
token = pg_strtok(&length); /* skip "<>" */
else
local_node->constvalue = readDatum(local_node->constbyval);
READ_DONE();
}
/*
* _readParam
*/
static Param *
_readParam(void)
{
READ_LOCALS(Param);
READ_ENUM_FIELD(paramkind, ParamKind);
READ_INT_FIELD(paramid);
READ_OID_FIELD(paramtype);
READ_INT_FIELD(paramtypmod);
READ_OID_FIELD(paramcollid);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readAggref
*/
static Aggref *
_readAggref(void)
{
READ_LOCALS(Aggref);
READ_OID_FIELD(aggfnoid);
READ_OID_FIELD(aggtype);
READ_OID_FIELD(aggcollid);
READ_OID_FIELD(inputcollid);
Fix handling of argument and result datatypes for partial aggregation. When doing partial aggregation, the args list of the upper (combining) Aggref node is replaced by a Var representing the output of the partial aggregation steps, which has either the aggregate's transition data type or a serialized representation of that. However, nodeAgg.c blindly continued to use the args list as an indication of the user-level argument types. This broke resolution of polymorphic transition datatypes at executor startup (though it accidentally failed to fail for the ANYARRAY case, which is likely the only one anyone had tested). Moreover, the constructed FuncExpr passed to the finalfunc contained completely wrong information, which would have led to bogus answers or crashes for any case where the finalfunc examined that information (which is only likely to be with polymorphic aggregates using a non-polymorphic transition type). As an independent bug, apply_partialaggref_adjustment neglected to resolve a polymorphic transition datatype before assigning it as the output type of the lower-level Aggref node. This again accidentally failed to fail for ANYARRAY but would be unlikely to work in other cases. To fix the first problem, record the user-level argument types in a separate OID-list field of Aggref, and look to that rather than the args list when asking what the argument types were. (It turns out to be convenient to include any "direct" arguments in this list too, although those are not currently subject to being overwritten.) Rather than adding yet another resolve_aggregate_transtype() call to fix the second problem, add an aggtranstype field to Aggref, and store the resolved transition type OID there when the planner first computes it. (By doing this in the planner and not the parser, we can allow the aggregate's transition type to change from time to time, although no DDL support yet exists for that.) This saves nothing of consequence for simple non-polymorphic aggregates, but for polymorphic transition types we save a catalog lookup during executor startup as well as several planner lookups that are new in 9.6 due to parallel query planning. In passing, fix an error that was introduced into count_agg_clauses_walker some time ago: it was applying exprTypmod() to something that wasn't an expression node at all, but a TargetEntry. exprTypmod silently returned -1 so that there was not an obvious failure, but this broke the intended sensitivity of aggregate space consumption estimates to the typmod of varchar and similar data types. This part needs to be back-patched. Catversion bump due to change of stored Aggref nodes. Discussion: <8229.1466109074@sss.pgh.pa.us>
2016-06-18 03:44:37 +02:00
READ_OID_FIELD(aggtranstype);
READ_NODE_FIELD(aggargtypes);
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
READ_NODE_FIELD(aggdirectargs);
READ_NODE_FIELD(args);
READ_NODE_FIELD(aggorder);
READ_NODE_FIELD(aggdistinct);
READ_NODE_FIELD(aggfilter);
READ_BOOL_FIELD(aggstar);
READ_BOOL_FIELD(aggvariadic);
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
READ_CHAR_FIELD(aggkind);
READ_UINT_FIELD(agglevelsup);
READ_ENUM_FIELD(aggsplit, AggSplit);
READ_LOCATION_FIELD(location);
READ_DONE();
}
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
/*
* _readGroupingFunc
*/
static GroupingFunc *
_readGroupingFunc(void)
{
READ_LOCALS(GroupingFunc);
READ_NODE_FIELD(args);
READ_NODE_FIELD(refs);
READ_NODE_FIELD(cols);
READ_UINT_FIELD(agglevelsup);
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readWindowFunc
*/
static WindowFunc *
_readWindowFunc(void)
{
READ_LOCALS(WindowFunc);
READ_OID_FIELD(winfnoid);
READ_OID_FIELD(wintype);
READ_OID_FIELD(wincollid);
READ_OID_FIELD(inputcollid);
READ_NODE_FIELD(args);
READ_NODE_FIELD(aggfilter);
READ_UINT_FIELD(winref);
READ_BOOL_FIELD(winstar);
READ_BOOL_FIELD(winagg);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readArrayRef
*/
static ArrayRef *
_readArrayRef(void)
{
READ_LOCALS(ArrayRef);
READ_OID_FIELD(refarraytype);
READ_OID_FIELD(refelemtype);
READ_INT_FIELD(reftypmod);
READ_OID_FIELD(refcollid);
READ_NODE_FIELD(refupperindexpr);
READ_NODE_FIELD(reflowerindexpr);
READ_NODE_FIELD(refexpr);
READ_NODE_FIELD(refassgnexpr);
READ_DONE();
}
/*
* _readFuncExpr
*/
static FuncExpr *
_readFuncExpr(void)
{
READ_LOCALS(FuncExpr);
READ_OID_FIELD(funcid);
READ_OID_FIELD(funcresulttype);
READ_BOOL_FIELD(funcretset);
READ_BOOL_FIELD(funcvariadic);
READ_ENUM_FIELD(funcformat, CoercionForm);
READ_OID_FIELD(funccollid);
READ_OID_FIELD(inputcollid);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readNamedArgExpr
*/
static NamedArgExpr *
_readNamedArgExpr(void)
{
READ_LOCALS(NamedArgExpr);
READ_NODE_FIELD(arg);
READ_STRING_FIELD(name);
READ_INT_FIELD(argnumber);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readOpExpr
*/
static OpExpr *
_readOpExpr(void)
{
READ_LOCALS(OpExpr);
READ_OID_FIELD(opno);
READ_OID_FIELD(opfuncid);
READ_OID_FIELD(opresulttype);
READ_BOOL_FIELD(opretset);
READ_OID_FIELD(opcollid);
READ_OID_FIELD(inputcollid);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readDistinctExpr
*/
static DistinctExpr *
_readDistinctExpr(void)
{
READ_LOCALS(DistinctExpr);
READ_OID_FIELD(opno);
READ_OID_FIELD(opfuncid);
READ_OID_FIELD(opresulttype);
READ_BOOL_FIELD(opretset);
READ_OID_FIELD(opcollid);
READ_OID_FIELD(inputcollid);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readNullIfExpr
*/
static NullIfExpr *
_readNullIfExpr(void)
{
READ_LOCALS(NullIfExpr);
READ_OID_FIELD(opno);
READ_OID_FIELD(opfuncid);
READ_OID_FIELD(opresulttype);
READ_BOOL_FIELD(opretset);
READ_OID_FIELD(opcollid);
READ_OID_FIELD(inputcollid);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readScalarArrayOpExpr
*/
static ScalarArrayOpExpr *
_readScalarArrayOpExpr(void)
{
READ_LOCALS(ScalarArrayOpExpr);
READ_OID_FIELD(opno);
READ_OID_FIELD(opfuncid);
READ_BOOL_FIELD(useOr);
READ_OID_FIELD(inputcollid);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readBoolExpr
*/
static BoolExpr *
_readBoolExpr(void)
{
READ_LOCALS(BoolExpr);
/* do-it-yourself enum representation */
token = pg_strtok(&length); /* skip :boolop */
token = pg_strtok(&length); /* get field value */
if (strncmp(token, "and", 3) == 0)
local_node->boolop = AND_EXPR;
else if (strncmp(token, "or", 2) == 0)
local_node->boolop = OR_EXPR;
else if (strncmp(token, "not", 3) == 0)
local_node->boolop = NOT_EXPR;
else
elog(ERROR, "unrecognized boolop \"%.*s\"", length, token);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readSubLink
*/
static SubLink *
_readSubLink(void)
{
READ_LOCALS(SubLink);
READ_ENUM_FIELD(subLinkType, SubLinkType);
READ_INT_FIELD(subLinkId);
READ_NODE_FIELD(testexpr);
READ_NODE_FIELD(operName);
READ_NODE_FIELD(subselect);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readSubPlan is not needed since it doesn't appear in stored rules.
*/
/*
* _readFieldSelect
*/
static FieldSelect *
_readFieldSelect(void)
{
READ_LOCALS(FieldSelect);
READ_NODE_FIELD(arg);
READ_INT_FIELD(fieldnum);
READ_OID_FIELD(resulttype);
READ_INT_FIELD(resulttypmod);
READ_OID_FIELD(resultcollid);
READ_DONE();
}
/*
* _readFieldStore
*/
static FieldStore *
_readFieldStore(void)
{
READ_LOCALS(FieldStore);
READ_NODE_FIELD(arg);
READ_NODE_FIELD(newvals);
READ_NODE_FIELD(fieldnums);
READ_OID_FIELD(resulttype);
READ_DONE();
}
/*
* _readRelabelType
*/
static RelabelType *
_readRelabelType(void)
{
READ_LOCALS(RelabelType);
READ_NODE_FIELD(arg);
READ_OID_FIELD(resulttype);
READ_INT_FIELD(resulttypmod);
READ_OID_FIELD(resultcollid);
READ_ENUM_FIELD(relabelformat, CoercionForm);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readCoerceViaIO
*/
static CoerceViaIO *
_readCoerceViaIO(void)
{
READ_LOCALS(CoerceViaIO);
READ_NODE_FIELD(arg);
READ_OID_FIELD(resulttype);
READ_OID_FIELD(resultcollid);
READ_ENUM_FIELD(coerceformat, CoercionForm);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readArrayCoerceExpr
*/
static ArrayCoerceExpr *
_readArrayCoerceExpr(void)
{
READ_LOCALS(ArrayCoerceExpr);
READ_NODE_FIELD(arg);
Support arrays over domains. Allowing arrays with a domain type as their element type was left un-done in the original domain patch, but not for any very good reason. This omission leads to such surprising results as array_agg() not working on a domain column, because the parser can't identify a suitable output type for the polymorphic aggregate. In order to fix this, first clean up the APIs of coerce_to_domain() and some internal functions in parse_coerce.c so that we consistently pass around a CoercionContext along with CoercionForm. Previously, we sometimes passed an "isExplicit" boolean flag instead, which is strictly less information; and coerce_to_domain() didn't even get that, but instead had to reverse-engineer isExplicit from CoercionForm. That's contrary to the documentation in primnodes.h that says that CoercionForm only affects display and not semantics. I don't think this change fixes any live bugs, but it makes things more consistent. The main reason for doing it though is that now build_coercion_expression() receives ccontext, which it needs in order to be able to recursively invoke coerce_to_target_type(). Next, reimplement ArrayCoerceExpr so that the node does not directly know any details of what has to be done to the individual array elements while performing the array coercion. Instead, the per-element processing is represented by a sub-expression whose input is a source array element and whose output is a target array element. This simplifies life in parse_coerce.c, because it can build that sub-expression by a recursive invocation of coerce_to_target_type(). The executor now handles the per-element processing as a compiled expression instead of hard-wired code. The main advantage of this is that we can use a single ArrayCoerceExpr to handle as many as three successive steps per element: base type conversion, typmod coercion, and domain constraint checking. The old code used two stacked ArrayCoerceExprs to handle type + typmod coercion, which was pretty inefficient, and adding yet another array deconstruction to do domain constraint checking seemed very unappetizing. In the case where we just need a single, very simple coercion function, doing this straightforwardly leads to a noticeable increase in the per-array-element runtime cost. Hence, add an additional shortcut evalfunc in execExprInterp.c that skips unnecessary overhead for that specific form of expression. The runtime speed of simple cases is within 1% or so of where it was before, while cases that previously required two levels of array processing are significantly faster. Finally, create an implicit array type for every domain type, as we do for base types, enums, etc. Everything except the array-coercion case seems to just work without further effort. Tom Lane, reviewed by Andrew Dunstan Discussion: https://postgr.es/m/9852.1499791473@sss.pgh.pa.us
2017-09-30 19:40:56 +02:00
READ_NODE_FIELD(elemexpr);
READ_OID_FIELD(resulttype);
READ_INT_FIELD(resulttypmod);
READ_OID_FIELD(resultcollid);
READ_ENUM_FIELD(coerceformat, CoercionForm);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readConvertRowtypeExpr
*/
static ConvertRowtypeExpr *
_readConvertRowtypeExpr(void)
{
READ_LOCALS(ConvertRowtypeExpr);
READ_NODE_FIELD(arg);
READ_OID_FIELD(resulttype);
READ_ENUM_FIELD(convertformat, CoercionForm);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readCollateExpr
*/
static CollateExpr *
_readCollateExpr(void)
{
READ_LOCALS(CollateExpr);
READ_NODE_FIELD(arg);
READ_OID_FIELD(collOid);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readCaseExpr
*/
static CaseExpr *
_readCaseExpr(void)
{
READ_LOCALS(CaseExpr);
READ_OID_FIELD(casetype);
READ_OID_FIELD(casecollid);
READ_NODE_FIELD(arg);
READ_NODE_FIELD(args);
READ_NODE_FIELD(defresult);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readCaseWhen
*/
static CaseWhen *
_readCaseWhen(void)
{
READ_LOCALS(CaseWhen);
READ_NODE_FIELD(expr);
READ_NODE_FIELD(result);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readCaseTestExpr
*/
static CaseTestExpr *
_readCaseTestExpr(void)
{
READ_LOCALS(CaseTestExpr);
READ_OID_FIELD(typeId);
READ_INT_FIELD(typeMod);
READ_OID_FIELD(collation);
READ_DONE();
}
/*
* _readArrayExpr
*/
static ArrayExpr *
_readArrayExpr(void)
{
READ_LOCALS(ArrayExpr);
READ_OID_FIELD(array_typeid);
READ_OID_FIELD(array_collid);
READ_OID_FIELD(element_typeid);
READ_NODE_FIELD(elements);
READ_BOOL_FIELD(multidims);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readRowExpr
*/
static RowExpr *
_readRowExpr(void)
{
READ_LOCALS(RowExpr);
READ_NODE_FIELD(args);
READ_OID_FIELD(row_typeid);
READ_ENUM_FIELD(row_format, CoercionForm);
READ_NODE_FIELD(colnames);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readRowCompareExpr
*/
static RowCompareExpr *
_readRowCompareExpr(void)
{
READ_LOCALS(RowCompareExpr);
READ_ENUM_FIELD(rctype, RowCompareType);
READ_NODE_FIELD(opnos);
READ_NODE_FIELD(opfamilies);
READ_NODE_FIELD(inputcollids);
READ_NODE_FIELD(largs);
READ_NODE_FIELD(rargs);
READ_DONE();
}
/*
* _readCoalesceExpr
*/
static CoalesceExpr *
_readCoalesceExpr(void)
{
READ_LOCALS(CoalesceExpr);
READ_OID_FIELD(coalescetype);
READ_OID_FIELD(coalescecollid);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readMinMaxExpr
*/
static MinMaxExpr *
_readMinMaxExpr(void)
{
READ_LOCALS(MinMaxExpr);
READ_OID_FIELD(minmaxtype);
READ_OID_FIELD(minmaxcollid);
READ_OID_FIELD(inputcollid);
READ_ENUM_FIELD(op, MinMaxOp);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readSQLValueFunction
*/
static SQLValueFunction *
_readSQLValueFunction(void)
{
READ_LOCALS(SQLValueFunction);
READ_ENUM_FIELD(op, SQLValueFunctionOp);
READ_OID_FIELD(type);
READ_INT_FIELD(typmod);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readXmlExpr
*/
static XmlExpr *
_readXmlExpr(void)
{
READ_LOCALS(XmlExpr);
READ_ENUM_FIELD(op, XmlExprOp);
READ_STRING_FIELD(name);
READ_NODE_FIELD(named_args);
READ_NODE_FIELD(arg_names);
READ_NODE_FIELD(args);
READ_ENUM_FIELD(xmloption, XmlOptionType);
READ_OID_FIELD(type);
READ_INT_FIELD(typmod);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readNullTest
*/
static NullTest *
_readNullTest(void)
{
READ_LOCALS(NullTest);
READ_NODE_FIELD(arg);
READ_ENUM_FIELD(nulltesttype, NullTestType);
READ_BOOL_FIELD(argisrow);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readBooleanTest
*/
static BooleanTest *
_readBooleanTest(void)
{
READ_LOCALS(BooleanTest);
READ_NODE_FIELD(arg);
READ_ENUM_FIELD(booltesttype, BoolTestType);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readCoerceToDomain
*/
static CoerceToDomain *
_readCoerceToDomain(void)
{
READ_LOCALS(CoerceToDomain);
READ_NODE_FIELD(arg);
READ_OID_FIELD(resulttype);
READ_INT_FIELD(resulttypmod);
READ_OID_FIELD(resultcollid);
READ_ENUM_FIELD(coercionformat, CoercionForm);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readCoerceToDomainValue
*/
static CoerceToDomainValue *
_readCoerceToDomainValue(void)
{
READ_LOCALS(CoerceToDomainValue);
READ_OID_FIELD(typeId);
READ_INT_FIELD(typeMod);
READ_OID_FIELD(collation);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readSetToDefault
*/
static SetToDefault *
_readSetToDefault(void)
{
READ_LOCALS(SetToDefault);
READ_OID_FIELD(typeId);
READ_INT_FIELD(typeMod);
READ_OID_FIELD(collation);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readCurrentOfExpr
*/
static CurrentOfExpr *
_readCurrentOfExpr(void)
{
READ_LOCALS(CurrentOfExpr);
READ_UINT_FIELD(cvarno);
READ_STRING_FIELD(cursor_name);
READ_INT_FIELD(cursor_param);
READ_DONE();
}
Code review for NextValueExpr expression node type. Add missing infrastructure for this node type, notably in ruleutils.c where its lack could demonstrably cause EXPLAIN to fail. Add outfuncs/readfuncs support. (outfuncs support is useful today for debugging purposes. The readfuncs support may never be needed, since at present it would only matter for parallel query and NextValueExpr should never appear in a parallelizable query; but it seems like a bad idea to have a primnode type that isn't fully supported here.) Teach planner infrastructure that NextValueExpr is a volatile, parallel-unsafe, non-leaky expression node with cost cpu_operator_cost. Given its limited scope of usage, there *might* be no live bug today from the lack of that knowledge, but it's certainly going to bite us on the rear someday. Teach pg_stat_statements about the new node type, too. While at it, also teach cost_qual_eval() that MinMaxExpr, SQLValueFunction, XmlExpr, and CoerceToDomain should be charged as cpu_operator_cost. Failing to do this for SQLValueFunction was an oversight in my commit 0bb51aa96. The others are longer-standing oversights, but no time like the present to fix them. (In principle, CoerceToDomain could have cost much higher than this, but it doesn't presently seem worth trying to examine the domain's constraints here.) Modify execExprInterp.c to execute NextValueExpr as an out-of-line function; it seems quite unlikely to me that it's worth insisting that it be inlined in all expression eval methods. Besides, providing the out-of-line function doesn't stop anyone from inlining if they want to. Adjust some places where NextValueExpr support had been inserted with the aid of a dartboard rather than keeping it in the same order as elsewhere. Discussion: https://postgr.es/m/23862.1499981661@sss.pgh.pa.us
2017-07-14 21:25:43 +02:00
/*
* _readNextValueExpr
*/
static NextValueExpr *
_readNextValueExpr(void)
{
READ_LOCALS(NextValueExpr);
READ_OID_FIELD(seqid);
READ_OID_FIELD(typeId);
READ_DONE();
}
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
/*
* _readInferenceElem
*/
static InferenceElem *
_readInferenceElem(void)
{
READ_LOCALS(InferenceElem);
READ_NODE_FIELD(expr);
READ_OID_FIELD(infercollid);
READ_OID_FIELD(inferopclass);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
READ_DONE();
}
/*
* _readTargetEntry
*/
static TargetEntry *
_readTargetEntry(void)
{
READ_LOCALS(TargetEntry);
READ_NODE_FIELD(expr);
READ_INT_FIELD(resno);
READ_STRING_FIELD(resname);
READ_UINT_FIELD(ressortgroupref);
READ_OID_FIELD(resorigtbl);
READ_INT_FIELD(resorigcol);
READ_BOOL_FIELD(resjunk);
READ_DONE();
}
/*
* _readRangeTblRef
*/
static RangeTblRef *
_readRangeTblRef(void)
{
READ_LOCALS(RangeTblRef);
READ_INT_FIELD(rtindex);
READ_DONE();
}
/*
* _readJoinExpr
*/
static JoinExpr *
_readJoinExpr(void)
{
READ_LOCALS(JoinExpr);
READ_ENUM_FIELD(jointype, JoinType);
READ_BOOL_FIELD(isNatural);
READ_NODE_FIELD(larg);
READ_NODE_FIELD(rarg);
READ_NODE_FIELD(usingClause);
READ_NODE_FIELD(quals);
READ_NODE_FIELD(alias);
READ_INT_FIELD(rtindex);
READ_DONE();
}
/*
* _readFromExpr
*/
static FromExpr *
_readFromExpr(void)
{
READ_LOCALS(FromExpr);
READ_NODE_FIELD(fromlist);
READ_NODE_FIELD(quals);
READ_DONE();
}
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
/*
* _readOnConflictExpr
*/
static OnConflictExpr *
_readOnConflictExpr(void)
{
READ_LOCALS(OnConflictExpr);
READ_ENUM_FIELD(action, OnConflictAction);
READ_NODE_FIELD(arbiterElems);
READ_NODE_FIELD(arbiterWhere);
READ_OID_FIELD(constraint);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
READ_NODE_FIELD(onConflictSet);
READ_NODE_FIELD(onConflictWhere);
READ_INT_FIELD(exclRelIndex);
READ_NODE_FIELD(exclRelTlist);
READ_DONE();
}
/*
* Stuff from parsenodes.h.
*/
/*
* _readRangeTblEntry
*/
static RangeTblEntry *
_readRangeTblEntry(void)
{
READ_LOCALS(RangeTblEntry);
/* put alias + eref first to make dump more legible */
READ_NODE_FIELD(alias);
READ_NODE_FIELD(eref);
READ_ENUM_FIELD(rtekind, RTEKind);
switch (local_node->rtekind)
{
case RTE_RELATION:
READ_OID_FIELD(relid);
READ_CHAR_FIELD(relkind);
READ_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
READ_NODE_FIELD(subquery);
READ_BOOL_FIELD(security_barrier);
break;
case RTE_JOIN:
READ_ENUM_FIELD(jointype, JoinType);
READ_NODE_FIELD(joinaliasvars);
break;
case RTE_FUNCTION:
READ_NODE_FIELD(functions);
READ_BOOL_FIELD(funcordinality);
break;
case RTE_TABLEFUNC:
READ_NODE_FIELD(tablefunc);
break;
case RTE_VALUES:
READ_NODE_FIELD(values_lists);
Fix reporting of column typmods for multi-row VALUES constructs. expandRTE() and get_rte_attribute_type() reported the exprType() and exprTypmod() values of the expressions in the first row of the VALUES as being the column type/typmod returned by the VALUES RTE. That's fine for the data type, since we coerce all expressions in a column to have the same common type. But we don't coerce them to have a common typmod, so it was possible for rows after the first one to return values that violate the claimed column typmod. This leads to the incorrect result seen in bug #14448 from Hassan Mahmood, as well as some other corner-case misbehaviors. The desired behavior is the same as we use in other type-unification cases: report the common typmod if there is one, but otherwise return -1 indicating no particular constraint. It's cheap for transformValuesClause to determine the common typmod while transforming a multi-row VALUES, but it'd be less cheap for expandRTE() and get_rte_attribute_type() to re-determine that info every time they're asked --- possibly a lot less cheap, if the VALUES has many rows. Therefore, the best fix is to record the common typmods explicitly in a list in the VALUES RTE, as we were already doing for column collations. This looks quite a bit like what we're doing for CTE RTEs, so we can save a little bit of space and code by unifying the representation for those two RTE types. They both now share coltypes/coltypmods/colcollations fields. (At some point it might seem desirable to populate those fields for all RTE types; but right now it looks like constructing them for other RTE types would add more code and cycles than it would save.) The RTE change requires a catversion bump, so this fix is only usable in HEAD. If we fix this at all in the back branches, the patch will need to look quite different. Report: https://postgr.es/m/20161205143037.4377.60754@wrigleys.postgresql.org Discussion: https://postgr.es/m/27429.1480968538@sss.pgh.pa.us
2016-12-08 17:40:02 +01:00
READ_NODE_FIELD(coltypes);
READ_NODE_FIELD(coltypmods);
READ_NODE_FIELD(colcollations);
break;
case RTE_CTE:
READ_STRING_FIELD(ctename);
READ_UINT_FIELD(ctelevelsup);
READ_BOOL_FIELD(self_reference);
Fix reporting of column typmods for multi-row VALUES constructs. expandRTE() and get_rte_attribute_type() reported the exprType() and exprTypmod() values of the expressions in the first row of the VALUES as being the column type/typmod returned by the VALUES RTE. That's fine for the data type, since we coerce all expressions in a column to have the same common type. But we don't coerce them to have a common typmod, so it was possible for rows after the first one to return values that violate the claimed column typmod. This leads to the incorrect result seen in bug #14448 from Hassan Mahmood, as well as some other corner-case misbehaviors. The desired behavior is the same as we use in other type-unification cases: report the common typmod if there is one, but otherwise return -1 indicating no particular constraint. It's cheap for transformValuesClause to determine the common typmod while transforming a multi-row VALUES, but it'd be less cheap for expandRTE() and get_rte_attribute_type() to re-determine that info every time they're asked --- possibly a lot less cheap, if the VALUES has many rows. Therefore, the best fix is to record the common typmods explicitly in a list in the VALUES RTE, as we were already doing for column collations. This looks quite a bit like what we're doing for CTE RTEs, so we can save a little bit of space and code by unifying the representation for those two RTE types. They both now share coltypes/coltypmods/colcollations fields. (At some point it might seem desirable to populate those fields for all RTE types; but right now it looks like constructing them for other RTE types would add more code and cycles than it would save.) The RTE change requires a catversion bump, so this fix is only usable in HEAD. If we fix this at all in the back branches, the patch will need to look quite different. Report: https://postgr.es/m/20161205143037.4377.60754@wrigleys.postgresql.org Discussion: https://postgr.es/m/27429.1480968538@sss.pgh.pa.us
2016-12-08 17:40:02 +01:00
READ_NODE_FIELD(coltypes);
READ_NODE_FIELD(coltypmods);
READ_NODE_FIELD(colcollations);
break;
case RTE_NAMEDTUPLESTORE:
READ_STRING_FIELD(enrname);
READ_FLOAT_FIELD(enrtuples);
READ_OID_FIELD(relid);
READ_NODE_FIELD(coltypes);
READ_NODE_FIELD(coltypmods);
READ_NODE_FIELD(colcollations);
break;
default:
elog(ERROR, "unrecognized RTE kind: %d",
(int) local_node->rtekind);
break;
}
READ_BOOL_FIELD(lateral);
READ_BOOL_FIELD(inh);
READ_BOOL_FIELD(inFromCl);
READ_UINT_FIELD(requiredPerms);
READ_OID_FIELD(checkAsUser);
READ_BITMAPSET_FIELD(selectedCols);
READ_BITMAPSET_FIELD(insertedCols);
READ_BITMAPSET_FIELD(updatedCols);
READ_NODE_FIELD(securityQuals);
READ_DONE();
}
/*
* _readRangeTblFunction
*/
static RangeTblFunction *
_readRangeTblFunction(void)
{
READ_LOCALS(RangeTblFunction);
READ_NODE_FIELD(funcexpr);
READ_INT_FIELD(funccolcount);
READ_NODE_FIELD(funccolnames);
READ_NODE_FIELD(funccoltypes);
READ_NODE_FIELD(funccoltypmods);
READ_NODE_FIELD(funccolcollations);
READ_BITMAPSET_FIELD(funcparams);
READ_DONE();
}
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
/*
* _readTableSampleClause
*/
static TableSampleClause *
_readTableSampleClause(void)
{
READ_LOCALS(TableSampleClause);
READ_OID_FIELD(tsmhandler);
READ_NODE_FIELD(args);
READ_NODE_FIELD(repeatable);
READ_DONE();
}
/*
* _readDefElem
*/
static DefElem *
_readDefElem(void)
{
READ_LOCALS(DefElem);
READ_STRING_FIELD(defnamespace);
READ_STRING_FIELD(defname);
READ_NODE_FIELD(arg);
READ_ENUM_FIELD(defaction, DefElemAction);
READ_LOCATION_FIELD(location);
READ_DONE();
}
/*
* _readPlannedStmt
*/
static PlannedStmt *
_readPlannedStmt(void)
{
READ_LOCALS(PlannedStmt);
READ_ENUM_FIELD(commandType, CmdType);
READ_UINT64_FIELD(queryId);
READ_BOOL_FIELD(hasReturning);
READ_BOOL_FIELD(hasModifyingCTE);
READ_BOOL_FIELD(canSetTag);
READ_BOOL_FIELD(transientPlan);
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
READ_BOOL_FIELD(dependsOnRole);
READ_BOOL_FIELD(parallelModeNeeded);
READ_BOOL_FIELD(jitFlags);
READ_NODE_FIELD(planTree);
READ_NODE_FIELD(rtable);
READ_NODE_FIELD(resultRelations);
READ_NODE_FIELD(nonleafResultRelations);
READ_NODE_FIELD(rootResultRelations);
READ_NODE_FIELD(subplans);
READ_BITMAPSET_FIELD(rewindPlanIDs);
READ_NODE_FIELD(rowMarks);
READ_NODE_FIELD(relationOids);
READ_NODE_FIELD(invalItems);
READ_NODE_FIELD(paramExecTypes);
Change representation of statement lists, and add statement location info. This patch makes several changes that improve the consistency of representation of lists of statements. It's always been the case that the output of parse analysis is a list of Query nodes, whatever the types of the individual statements in the list. This patch brings similar consistency to the outputs of raw parsing and planning steps: * The output of raw parsing is now always a list of RawStmt nodes; the statement-type-dependent nodes are one level down from that. * The output of pg_plan_queries() is now always a list of PlannedStmt nodes, even for utility statements. In the case of a utility statement, "planning" just consists of wrapping a CMD_UTILITY PlannedStmt around the utility node. This list representation is now used in Portal and CachedPlan plan lists, replacing the former convention of intermixing PlannedStmts with bare utility-statement nodes. Now, every list of statements has a consistent head-node type depending on how far along it is in processing. This allows changing many places that formerly used generic "Node *" pointers to use a more specific pointer type, thus reducing the number of IsA() tests and casts needed, as well as improving code clarity. Also, the post-parse-analysis representation of DECLARE CURSOR is changed so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained SELECT remains a child of the DeclareCursorStmt rather than getting flipped around to be the other way. It's now true for both Query and PlannedStmt that utilityStmt is non-null if and only if commandType is CMD_UTILITY. That allows simplifying a lot of places that were testing both fields. (I think some of those were just defensive programming, but in many places, it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.) Because PlannedStmt carries a canSetTag field, we're also able to get rid of some ad-hoc rules about how to reconstruct canSetTag for a bare utility statement; specifically, the assumption that a utility is canSetTag if and only if it's the only one in its list. While I see no near-term need for relaxing that restriction, it's nice to get rid of the ad-hocery. The API of ProcessUtility() is changed so that what it's passed is the wrapper PlannedStmt not just the bare utility statement. This will affect all users of ProcessUtility_hook, but the changes are pretty trivial; see the affected contrib modules for examples of the minimum change needed. (Most compilers should give pointer-type-mismatch warnings for uncorrected code.) There's also a change in the API of ExplainOneQuery_hook, to pass through cursorOptions instead of expecting hook functions to know what to pick. This is needed because of the DECLARE CURSOR changes, but really should have been done in 9.6; it's unlikely that any extant hook functions know about using CURSOR_OPT_PARALLEL_OK. Finally, teach gram.y to save statement boundary locations in RawStmt nodes, and pass those through to Query and PlannedStmt nodes. This allows more intelligent handling of cases where a source query string contains multiple statements. This patch doesn't actually do anything with the information, but a follow-on patch will. (Passing this information through cleanly is the true motivation for these changes; while I think this is all good cleanup, it's unlikely we'd have bothered without this end goal.) catversion bump because addition of location fields to struct Query affects stored rules. This patch is by me, but it owes a good deal to Fabien Coelho who did a lot of preliminary work on the problem, and also reviewed the patch. Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
READ_NODE_FIELD(utilityStmt);
READ_LOCATION_FIELD(stmt_location);
READ_LOCATION_FIELD(stmt_len);
READ_DONE();
}
/*
* ReadCommonPlan
* Assign the basic stuff of all nodes that inherit from Plan
*/
static void
ReadCommonPlan(Plan *local_node)
{
READ_TEMP_LOCALS();
READ_FLOAT_FIELD(startup_cost);
READ_FLOAT_FIELD(total_cost);
READ_FLOAT_FIELD(plan_rows);
READ_INT_FIELD(plan_width);
READ_BOOL_FIELD(parallel_aware);
READ_BOOL_FIELD(parallel_safe);
READ_INT_FIELD(plan_node_id);
READ_NODE_FIELD(targetlist);
READ_NODE_FIELD(qual);
READ_NODE_FIELD(lefttree);
READ_NODE_FIELD(righttree);
READ_NODE_FIELD(initPlan);
READ_BITMAPSET_FIELD(extParam);
READ_BITMAPSET_FIELD(allParam);
}
/*
* _readPlan
*/
static Plan *
_readPlan(void)
{
READ_LOCALS_NO_FIELDS(Plan);
ReadCommonPlan(local_node);
READ_DONE();
}
/*
* _readResult
*/
static Result *
_readResult(void)
{
READ_LOCALS(Result);
ReadCommonPlan(&local_node->plan);
READ_NODE_FIELD(resconstantqual);
READ_DONE();
}
Move targetlist SRF handling from expression evaluation to new executor node. Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT generate_series(1,5)) so far was done in the expression evaluation (i.e. ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code. This meant that most executor nodes performing projection, and most expression evaluation functions, had to deal with the possibility that an evaluated expression could return a set of return values. That's bad because it leads to repeated code in a lot of places. It also, and that's my (Andres's) motivation, made it a lot harder to implement a more efficient way of doing expression evaluation. To fix this, introduce a new executor node (ProjectSet) that can evaluate targetlists containing one or more SRFs. To avoid the complexity of the old way of handling nested expressions returning sets (e.g. having to pass up ExprDoneCond, and dealing with arguments to functions returning sets etc.), those SRFs can only be at the top level of the node's targetlist. The planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is only necessary in ProjectSet nodes and that SRFs are only present at the top level of the node's targetlist. If there are nested SRFs the planner creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get input from an underlying node. We also discussed and prototyped evaluating targetlist SRFs using ROWS FROM(), but that turned out to be more complicated than we'd hoped. While moving SRF evaluation to ProjectSet would allow to retain the old "least common multiple" behavior when multiple SRFs are present in one targetlist (i.e. continue returning rows until all SRFs are at the end of their input at the same time), we decided to instead only return rows till all SRFs are exhausted, returning NULL for already exhausted ones. We deemed the previous behavior to be too confusing, unexpected and actually not particularly useful. As a side effect, the previously prohibited case of multiple set returning arguments to a function, is now allowed. Not because it's particularly desirable, but because it ends up working and there seems to be no argument for adding code to prohibit it. Currently the behavior for COALESCE and CASE containing SRFs has changed, returning multiple rows from the expression, even when the SRF containing "arm" of the expression is not evaluated. That's because the SRFs are evaluated in a separate ProjectSet node. As that's quite confusing, we're likely to instead prohibit SRFs in those places. But that's still being discussed, and the code would reside in places not touched here, so that's a task for later. There's a lot of, now superfluous, code dealing with set return expressions around. But as the changes to get rid of those are verbose largely boring, it seems better for readability to keep the cleanup as a separate commit. Author: Tom Lane and Andres Freund Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
/*
* _readProjectSet
*/
static ProjectSet *
_readProjectSet(void)
{
READ_LOCALS_NO_FIELDS(ProjectSet);
ReadCommonPlan(&local_node->plan);
READ_DONE();
}
/*
* _readModifyTable
*/
static ModifyTable *
_readModifyTable(void)
{
READ_LOCALS(ModifyTable);
ReadCommonPlan(&local_node->plan);
READ_ENUM_FIELD(operation, CmdType);
READ_BOOL_FIELD(canSetTag);
READ_UINT_FIELD(nominalRelation);
READ_NODE_FIELD(partitioned_rels);
READ_BOOL_FIELD(partColsUpdated);
READ_NODE_FIELD(resultRelations);
READ_INT_FIELD(resultRelIndex);
READ_INT_FIELD(rootResultRelIndex);
READ_NODE_FIELD(plans);
READ_NODE_FIELD(withCheckOptionLists);
READ_NODE_FIELD(returningLists);
READ_NODE_FIELD(fdwPrivLists);
READ_BITMAPSET_FIELD(fdwDirectModifyPlans);
READ_NODE_FIELD(rowMarks);
READ_INT_FIELD(epqParam);
READ_ENUM_FIELD(onConflictAction, OnConflictAction);
READ_NODE_FIELD(arbiterIndexes);
READ_NODE_FIELD(onConflictSet);
READ_NODE_FIELD(onConflictWhere);
READ_UINT_FIELD(exclRelRTI);
READ_NODE_FIELD(exclRelTlist);
READ_DONE();
}
/*
* _readAppend
*/
static Append *
_readAppend(void)
{
READ_LOCALS(Append);
ReadCommonPlan(&local_node->plan);
READ_NODE_FIELD(partitioned_rels);
READ_NODE_FIELD(appendplans);
READ_INT_FIELD(first_partial_plan);
READ_DONE();
}
/*
* _readMergeAppend
*/
static MergeAppend *
_readMergeAppend(void)
{
READ_LOCALS(MergeAppend);
ReadCommonPlan(&local_node->plan);
READ_NODE_FIELD(partitioned_rels);
READ_NODE_FIELD(mergeplans);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(sortColIdx, local_node->numCols);
READ_OID_ARRAY(sortOperators, local_node->numCols);
READ_OID_ARRAY(collations, local_node->numCols);
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
READ_DONE();
}
/*
* _readRecursiveUnion
*/
static RecursiveUnion *
_readRecursiveUnion(void)
{
READ_LOCALS(RecursiveUnion);
ReadCommonPlan(&local_node->plan);
READ_INT_FIELD(wtParam);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
READ_OID_ARRAY(dupOperators, local_node->numCols);
READ_LONG_FIELD(numGroups);
READ_DONE();
}
/*
* _readBitmapAnd
*/
static BitmapAnd *
_readBitmapAnd(void)
{
READ_LOCALS(BitmapAnd);
ReadCommonPlan(&local_node->plan);
READ_NODE_FIELD(bitmapplans);
READ_DONE();
}
/*
* _readBitmapOr
*/
static BitmapOr *
_readBitmapOr(void)
{
READ_LOCALS(BitmapOr);
ReadCommonPlan(&local_node->plan);
READ_BOOL_FIELD(isshared);
READ_NODE_FIELD(bitmapplans);
READ_DONE();
}
/*
* ReadCommonScan
* Assign the basic stuff of all nodes that inherit from Scan
*/
static void
ReadCommonScan(Scan *local_node)
{
READ_TEMP_LOCALS();
ReadCommonPlan(&local_node->plan);
READ_UINT_FIELD(scanrelid);
}
/*
* _readScan
*/
static Scan *
_readScan(void)
{
READ_LOCALS_NO_FIELDS(Scan);
ReadCommonScan(local_node);
READ_DONE();
}
/*
* _readSeqScan
*/
static SeqScan *
_readSeqScan(void)
{
READ_LOCALS_NO_FIELDS(SeqScan);
ReadCommonScan(local_node);
READ_DONE();
}
/*
* _readSampleScan
*/
static SampleScan *
_readSampleScan(void)
{
READ_LOCALS(SampleScan);
ReadCommonScan(&local_node->scan);
READ_NODE_FIELD(tablesample);
READ_DONE();
}
/*
* _readIndexScan
*/
static IndexScan *
_readIndexScan(void)
{
READ_LOCALS(IndexScan);
ReadCommonScan(&local_node->scan);
READ_OID_FIELD(indexid);
READ_NODE_FIELD(indexqual);
READ_NODE_FIELD(indexqualorig);
READ_NODE_FIELD(indexorderby);
READ_NODE_FIELD(indexorderbyorig);
READ_NODE_FIELD(indexorderbyops);
READ_ENUM_FIELD(indexorderdir, ScanDirection);
READ_DONE();
}
/*
* _readIndexOnlyScan
*/
static IndexOnlyScan *
_readIndexOnlyScan(void)
{
READ_LOCALS(IndexOnlyScan);
ReadCommonScan(&local_node->scan);
READ_OID_FIELD(indexid);
READ_NODE_FIELD(indexqual);
READ_NODE_FIELD(indexorderby);
READ_NODE_FIELD(indextlist);
READ_ENUM_FIELD(indexorderdir, ScanDirection);
READ_DONE();
}
/*
* _readBitmapIndexScan
*/
static BitmapIndexScan *
_readBitmapIndexScan(void)
{
READ_LOCALS(BitmapIndexScan);
ReadCommonScan(&local_node->scan);
READ_OID_FIELD(indexid);
READ_BOOL_FIELD(isshared);
READ_NODE_FIELD(indexqual);
READ_NODE_FIELD(indexqualorig);
READ_DONE();
}
/*
* _readBitmapHeapScan
*/
static BitmapHeapScan *
_readBitmapHeapScan(void)
{
READ_LOCALS(BitmapHeapScan);
ReadCommonScan(&local_node->scan);
READ_NODE_FIELD(bitmapqualorig);
READ_DONE();
}
/*
* _readTidScan
*/
static TidScan *
_readTidScan(void)
{
READ_LOCALS(TidScan);
ReadCommonScan(&local_node->scan);
READ_NODE_FIELD(tidquals);
READ_DONE();
}
/*
* _readSubqueryScan
*/
static SubqueryScan *
_readSubqueryScan(void)
{
READ_LOCALS(SubqueryScan);
ReadCommonScan(&local_node->scan);
READ_NODE_FIELD(subplan);
READ_DONE();
}
/*
* _readFunctionScan
*/
static FunctionScan *
_readFunctionScan(void)
{
READ_LOCALS(FunctionScan);
ReadCommonScan(&local_node->scan);
READ_NODE_FIELD(functions);
READ_BOOL_FIELD(funcordinality);
READ_DONE();
}
/*
* _readValuesScan
*/
static ValuesScan *
_readValuesScan(void)
{
READ_LOCALS(ValuesScan);
ReadCommonScan(&local_node->scan);
READ_NODE_FIELD(values_lists);
READ_DONE();
}
/*
* _readTableFuncScan
*/
static TableFuncScan *
_readTableFuncScan(void)
{
READ_LOCALS(TableFuncScan);
ReadCommonScan(&local_node->scan);
READ_NODE_FIELD(tablefunc);
READ_DONE();
}
/*
* _readCteScan
*/
static CteScan *
_readCteScan(void)
{
READ_LOCALS(CteScan);
ReadCommonScan(&local_node->scan);
READ_INT_FIELD(ctePlanId);
READ_INT_FIELD(cteParam);
READ_DONE();
}
/*
* _readWorkTableScan
*/
static WorkTableScan *
_readWorkTableScan(void)
{
READ_LOCALS(WorkTableScan);
ReadCommonScan(&local_node->scan);
READ_INT_FIELD(wtParam);
READ_DONE();
}
/*
* _readForeignScan
*/
static ForeignScan *
_readForeignScan(void)
{
READ_LOCALS(ForeignScan);
ReadCommonScan(&local_node->scan);
READ_ENUM_FIELD(operation, CmdType);
READ_OID_FIELD(fs_server);
READ_NODE_FIELD(fdw_exprs);
READ_NODE_FIELD(fdw_private);
READ_NODE_FIELD(fdw_scan_tlist);
READ_NODE_FIELD(fdw_recheck_quals);
READ_BITMAPSET_FIELD(fs_relids);
READ_BOOL_FIELD(fsSystemCol);
READ_DONE();
}
/*
* _readCustomScan
*/
static CustomScan *
_readCustomScan(void)
{
READ_LOCALS(CustomScan);
char *custom_name;
const CustomScanMethods *methods;
ReadCommonScan(&local_node->scan);
READ_UINT_FIELD(flags);
READ_NODE_FIELD(custom_plans);
READ_NODE_FIELD(custom_exprs);
READ_NODE_FIELD(custom_private);
READ_NODE_FIELD(custom_scan_tlist);
READ_BITMAPSET_FIELD(custom_relids);
/* Lookup CustomScanMethods by CustomName */
2016-06-10 00:02:36 +02:00
token = pg_strtok(&length); /* skip methods: */
token = pg_strtok(&length); /* CustomName */
custom_name = nullable_string(token, length);
methods = GetCustomScanMethods(custom_name, false);
local_node->methods = methods;
READ_DONE();
}
/*
* ReadCommonJoin
* Assign the basic stuff of all nodes that inherit from Join
*/
static void
ReadCommonJoin(Join *local_node)
{
READ_TEMP_LOCALS();
ReadCommonPlan(&local_node->plan);
READ_ENUM_FIELD(jointype, JoinType);
READ_BOOL_FIELD(inner_unique);
READ_NODE_FIELD(joinqual);
}
/*
* _readJoin
*/
static Join *
_readJoin(void)
{
READ_LOCALS_NO_FIELDS(Join);
ReadCommonJoin(local_node);
READ_DONE();
}
/*
* _readNestLoop
*/
static NestLoop *
_readNestLoop(void)
{
READ_LOCALS(NestLoop);
ReadCommonJoin(&local_node->join);
READ_NODE_FIELD(nestParams);
READ_DONE();
}
/*
* _readMergeJoin
*/
static MergeJoin *
_readMergeJoin(void)
{
int numCols;
READ_LOCALS(MergeJoin);
ReadCommonJoin(&local_node->join);
READ_BOOL_FIELD(skip_mark_restore);
READ_NODE_FIELD(mergeclauses);
numCols = list_length(local_node->mergeclauses);
READ_OID_ARRAY(mergeFamilies, numCols);
READ_OID_ARRAY(mergeCollations, numCols);
READ_INT_ARRAY(mergeStrategies, numCols);
READ_BOOL_ARRAY(mergeNullsFirst, numCols);
READ_DONE();
}
/*
* _readHashJoin
*/
static HashJoin *
_readHashJoin(void)
{
READ_LOCALS(HashJoin);
ReadCommonJoin(&local_node->join);
READ_NODE_FIELD(hashclauses);
READ_DONE();
}
/*
* _readMaterial
*/
static Material *
_readMaterial(void)
{
READ_LOCALS_NO_FIELDS(Material);
ReadCommonPlan(&local_node->plan);
READ_DONE();
}
/*
* _readSort
*/
static Sort *
_readSort(void)
{
READ_LOCALS(Sort);
ReadCommonPlan(&local_node->plan);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(sortColIdx, local_node->numCols);
READ_OID_ARRAY(sortOperators, local_node->numCols);
READ_OID_ARRAY(collations, local_node->numCols);
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
READ_DONE();
}
/*
* _readGroup
*/
static Group *
_readGroup(void)
{
READ_LOCALS(Group);
ReadCommonPlan(&local_node->plan);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
READ_OID_ARRAY(grpOperators, local_node->numCols);
READ_DONE();
}
/*
* _readAgg
*/
static Agg *
_readAgg(void)
{
READ_LOCALS(Agg);
ReadCommonPlan(&local_node->plan);
READ_ENUM_FIELD(aggstrategy, AggStrategy);
READ_ENUM_FIELD(aggsplit, AggSplit);
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
READ_OID_ARRAY(grpOperators, local_node->numCols);
READ_LONG_FIELD(numGroups);
READ_BITMAPSET_FIELD(aggParams);
READ_NODE_FIELD(groupingSets);
READ_NODE_FIELD(chain);
READ_DONE();
}
/*
* _readWindowAgg
*/
static WindowAgg *
_readWindowAgg(void)
{
READ_LOCALS(WindowAgg);
ReadCommonPlan(&local_node->plan);
READ_UINT_FIELD(winref);
READ_INT_FIELD(partNumCols);
READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols);
READ_OID_ARRAY(partOperators, local_node->partNumCols);
READ_INT_FIELD(ordNumCols);
READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols);
READ_OID_ARRAY(ordOperators, local_node->ordNumCols);
READ_INT_FIELD(frameOptions);
READ_NODE_FIELD(startOffset);
READ_NODE_FIELD(endOffset);
Support all SQL:2011 options for window frame clauses. This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING" frame boundaries in window functions. We'd punted on that back in the original patch to add window functions, because it was not clear how to do it in a reasonably data-type-extensible fashion. That problem is resolved here by adding the ability for btree operator classes to provide an "in_range" support function that defines how to add or subtract the RANGE offset value. Factoring it this way also allows the operator class to avoid overflow problems near the ends of the datatype's range, if it wishes to expend effort on that. (In the committed patch, the integer opclasses handle that issue, but it did not seem worth the trouble to avoid overflow failures for datetime types.) The patch includes in_range support for the integer_ops opfamily (int2/int4/int8) as well as the standard datetime types. Support for other numeric types has been requested, but that seems like suitable material for a follow-on patch. In addition, the patch adds GROUPS mode which counts the offset in ORDER-BY peer groups rather than rows, and it adds the frame_exclusion options specified by SQL:2011. As far as I can see, we are now fully up to spec on window framing options. Existing behaviors remain unchanged, except that I changed the errcode for a couple of existing error reports to meet the SQL spec's expectation that negative "offset" values should be reported as SQLSTATE 22013. Internally and in relevant parts of the documentation, we now consistently use the terminology "offset PRECEDING/FOLLOWING" rather than "value PRECEDING/FOLLOWING", since the term "value" is confusingly vague. Oliver Ford, reviewed and whacked around some by me Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
2018-02-07 06:06:50 +01:00
READ_OID_FIELD(startInRangeFunc);
READ_OID_FIELD(endInRangeFunc);
READ_OID_FIELD(inRangeColl);
READ_BOOL_FIELD(inRangeAsc);
READ_BOOL_FIELD(inRangeNullsFirst);
READ_DONE();
}
/*
* _readUnique
*/
static Unique *
_readUnique(void)
{
READ_LOCALS(Unique);
ReadCommonPlan(&local_node->plan);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols);
READ_OID_ARRAY(uniqOperators, local_node->numCols);
READ_DONE();
}
/*
* _readGather
*/
static Gather *
_readGather(void)
{
READ_LOCALS(Gather);
ReadCommonPlan(&local_node->plan);
READ_INT_FIELD(num_workers);
Force rescanning of parallel-aware scan nodes below a Gather[Merge]. The ExecReScan machinery contains various optimizations for postponing or skipping rescans of plan subtrees; for example a HashAgg node may conclude that it can re-use the table it built before, instead of re-reading its input subtree. But that is wrong if the input contains a parallel-aware table scan node, since the portion of the table scanned by the leader process is likely to vary from one rescan to the next. This explains the timing-dependent buildfarm failures we saw after commit a2b70c89c. The established mechanism for showing that a plan node's output is potentially variable is to mark it as depending on some runtime Param. Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC parameter number, but carries no actual value) associated with each Gather or GatherMerge node, mark parallel-aware nodes below that node as dependent on that Param, and arrange for ExecReScanGather[Merge] to flag that Param as changed whenever the Gather[Merge] node is rescanned. This solution breaks an undocumented assumption made by the parallel executor logic, namely that all rescans of nodes below a Gather[Merge] will happen synchronously during the ReScan of the top node itself. But that's fundamentally contrary to the design of the ExecReScan code, and so was doomed to fail someday anyway (even if you want to argue that the bug being fixed here wasn't a failure of that assumption). A follow-on patch will address that issue. In the meantime, the worst that's expected to happen is that given very bad timing luck, the leader might have to do all the work during a rescan, because workers think they have nothing to do, if they are able to start up before the eventual ReScan of the leader's parallel-aware table scan node has reset the shared scan state. Although this problem exists in 9.6, there does not seem to be any way for it to manifest there. Without GatherMerge, it seems that a plan tree that has a rescan-short-circuiting node below Gather will always also have one above it that will short-circuit in the same cases, preventing the Gather from being rescanned. Hence we won't take the risk of back-patching this change into 9.6. But v10 needs it. Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
READ_INT_FIELD(rescan_param);
READ_BOOL_FIELD(single_copy);
READ_BOOL_FIELD(invisible);
READ_BITMAPSET_FIELD(initParam);
READ_DONE();
}
/*
* _readGatherMerge
*/
static GatherMerge *
_readGatherMerge(void)
{
READ_LOCALS(GatherMerge);
ReadCommonPlan(&local_node->plan);
READ_INT_FIELD(num_workers);
Force rescanning of parallel-aware scan nodes below a Gather[Merge]. The ExecReScan machinery contains various optimizations for postponing or skipping rescans of plan subtrees; for example a HashAgg node may conclude that it can re-use the table it built before, instead of re-reading its input subtree. But that is wrong if the input contains a parallel-aware table scan node, since the portion of the table scanned by the leader process is likely to vary from one rescan to the next. This explains the timing-dependent buildfarm failures we saw after commit a2b70c89c. The established mechanism for showing that a plan node's output is potentially variable is to mark it as depending on some runtime Param. Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC parameter number, but carries no actual value) associated with each Gather or GatherMerge node, mark parallel-aware nodes below that node as dependent on that Param, and arrange for ExecReScanGather[Merge] to flag that Param as changed whenever the Gather[Merge] node is rescanned. This solution breaks an undocumented assumption made by the parallel executor logic, namely that all rescans of nodes below a Gather[Merge] will happen synchronously during the ReScan of the top node itself. But that's fundamentally contrary to the design of the ExecReScan code, and so was doomed to fail someday anyway (even if you want to argue that the bug being fixed here wasn't a failure of that assumption). A follow-on patch will address that issue. In the meantime, the worst that's expected to happen is that given very bad timing luck, the leader might have to do all the work during a rescan, because workers think they have nothing to do, if they are able to start up before the eventual ReScan of the leader's parallel-aware table scan node has reset the shared scan state. Although this problem exists in 9.6, there does not seem to be any way for it to manifest there. Without GatherMerge, it seems that a plan tree that has a rescan-short-circuiting node below Gather will always also have one above it that will short-circuit in the same cases, preventing the Gather from being rescanned. Hence we won't take the risk of back-patching this change into 9.6. But v10 needs it. Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
READ_INT_FIELD(rescan_param);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(sortColIdx, local_node->numCols);
READ_OID_ARRAY(sortOperators, local_node->numCols);
READ_OID_ARRAY(collations, local_node->numCols);
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
READ_BITMAPSET_FIELD(initParam);
READ_DONE();
}
/*
* _readHash
*/
static Hash *
_readHash(void)
{
READ_LOCALS(Hash);
ReadCommonPlan(&local_node->plan);
READ_OID_FIELD(skewTable);
READ_INT_FIELD(skewColumn);
READ_BOOL_FIELD(skewInherit);
Add parallel-aware hash joins. Introduce parallel-aware hash joins that appear in EXPLAIN plans as Parallel Hash Join with Parallel Hash. While hash joins could already appear in parallel queries, they were previously always parallel-oblivious and had a partial subplan only on the outer side, meaning that the work of the inner subplan was duplicated in every worker. After this commit, the planner will consider using a partial subplan on the inner side too, using the Parallel Hash node to divide the work over the available CPU cores and combine its results in shared memory. If the join needs to be split into multiple batches in order to respect work_mem, then workers process different batches as much as possible and then work together on the remaining batches. The advantages of a parallel-aware hash join over a parallel-oblivious hash join used in a parallel query are that it: * avoids wasting memory on duplicated hash tables * avoids wasting disk space on duplicated batch files * divides the work of building the hash table over the CPUs One disadvantage is that there is some communication between the participating CPUs which might outweigh the benefits of parallelism in the case of small hash tables. This is avoided by the planner's existing reluctance to supply partial plans for small scans, but it may be necessary to estimate synchronization costs in future if that situation changes. Another is that outer batch 0 must be written to disk if multiple batches are required. A potential future advantage of parallel-aware hash joins is that right and full outer joins could be supported, since there is a single set of matched bits for each hashtable, but that is not yet implemented. A new GUC enable_parallel_hash is defined to control the feature, defaulting to on. Author: Thomas Munro Reviewed-By: Andres Freund, Robert Haas Tested-By: Rafia Sabih, Prabhat Sahu Discussion: https://postgr.es/m/CAEepm=2W=cOkiZxcg6qiFQP-dHUe09aqTrEMM7yJDrHMhDv_RA@mail.gmail.com https://postgr.es/m/CAEepm=37HKyJ4U6XOLi=JgfSHM3o6B-GaeO-6hkOmneTDkH+Uw@mail.gmail.com
2017-12-21 08:39:21 +01:00
READ_FLOAT_FIELD(rows_total);
READ_DONE();
}
/*
* _readSetOp
*/
static SetOp *
_readSetOp(void)
{
READ_LOCALS(SetOp);
ReadCommonPlan(&local_node->plan);
READ_ENUM_FIELD(cmd, SetOpCmd);
READ_ENUM_FIELD(strategy, SetOpStrategy);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
READ_OID_ARRAY(dupOperators, local_node->numCols);
READ_INT_FIELD(flagColIdx);
READ_INT_FIELD(firstFlag);
READ_LONG_FIELD(numGroups);
READ_DONE();
}
/*
* _readLockRows
*/
static LockRows *
_readLockRows(void)
{
READ_LOCALS(LockRows);
ReadCommonPlan(&local_node->plan);
READ_NODE_FIELD(rowMarks);
READ_INT_FIELD(epqParam);
READ_DONE();
}
/*
* _readLimit
*/
static Limit *
_readLimit(void)
{
READ_LOCALS(Limit);
ReadCommonPlan(&local_node->plan);
READ_NODE_FIELD(limitOffset);
READ_NODE_FIELD(limitCount);
READ_DONE();
}
/*
* _readNestLoopParam
*/
static NestLoopParam *
_readNestLoopParam(void)
{
READ_LOCALS(NestLoopParam);
READ_INT_FIELD(paramno);
READ_NODE_FIELD(paramval);
READ_DONE();
}
/*
* _readPlanRowMark
*/
static PlanRowMark *
_readPlanRowMark(void)
{
READ_LOCALS(PlanRowMark);
READ_UINT_FIELD(rti);
READ_UINT_FIELD(prti);
READ_UINT_FIELD(rowmarkId);
READ_ENUM_FIELD(markType, RowMarkType);
READ_INT_FIELD(allMarkTypes);
READ_ENUM_FIELD(strength, LockClauseStrength);
READ_ENUM_FIELD(waitPolicy, LockWaitPolicy);
READ_BOOL_FIELD(isParent);
READ_DONE();
}
/*
* _readPlanInvalItem
*/
static PlanInvalItem *
_readPlanInvalItem(void)
{
READ_LOCALS(PlanInvalItem);
READ_INT_FIELD(cacheId);
READ_UINT_FIELD(hashValue);
READ_DONE();
}
/*
* _readSubPlan
*/
static SubPlan *
_readSubPlan(void)
{
READ_LOCALS(SubPlan);
READ_ENUM_FIELD(subLinkType, SubLinkType);
READ_NODE_FIELD(testexpr);
READ_NODE_FIELD(paramIds);
READ_INT_FIELD(plan_id);
READ_STRING_FIELD(plan_name);
READ_OID_FIELD(firstColType);
READ_INT_FIELD(firstColTypmod);
READ_OID_FIELD(firstColCollation);
READ_BOOL_FIELD(useHashTable);
READ_BOOL_FIELD(unknownEqFalse);
READ_BOOL_FIELD(parallel_safe);
READ_NODE_FIELD(setParam);
READ_NODE_FIELD(parParam);
READ_NODE_FIELD(args);
READ_FLOAT_FIELD(startup_cost);
READ_FLOAT_FIELD(per_call_cost);
READ_DONE();
}
/*
* _readAlternativeSubPlan
*/
static AlternativeSubPlan *
_readAlternativeSubPlan(void)
{
READ_LOCALS(AlternativeSubPlan);
READ_NODE_FIELD(subplans);
READ_DONE();
}
/*
* _readExtensibleNode
*/
static ExtensibleNode *
_readExtensibleNode(void)
{
const ExtensibleNodeMethods *methods;
ExtensibleNode *local_node;
2016-06-10 00:02:36 +02:00
const char *extnodename;
READ_TEMP_LOCALS();
2016-07-07 22:13:37 +02:00
token = pg_strtok(&length); /* skip :extnodename */
2016-06-10 00:02:36 +02:00
token = pg_strtok(&length); /* get extnodename */
extnodename = nullable_string(token, length);
if (!extnodename)
elog(ERROR, "extnodename has to be supplied");
methods = GetExtensibleNodeMethods(extnodename, false);
local_node = (ExtensibleNode *) newNode(methods->node_size,
T_ExtensibleNode);
local_node->extnodename = extnodename;
/* deserialize the private fields */
methods->nodeRead(local_node);
READ_DONE();
}
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
/*
* _readPartitionBoundSpec
*/
static PartitionBoundSpec *
_readPartitionBoundSpec(void)
{
READ_LOCALS(PartitionBoundSpec);
READ_CHAR_FIELD(strategy);
READ_BOOL_FIELD(is_default);
READ_INT_FIELD(modulus);
READ_INT_FIELD(remainder);
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
READ_NODE_FIELD(listdatums);
READ_NODE_FIELD(lowerdatums);
READ_NODE_FIELD(upperdatums);
READ_LOCATION_FIELD(location);
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
READ_DONE();
}
/*
* _readPartitionRangeDatum
*/
static PartitionRangeDatum *
_readPartitionRangeDatum(void)
{
READ_LOCALS(PartitionRangeDatum);
READ_ENUM_FIELD(kind, PartitionRangeDatumKind);
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
READ_NODE_FIELD(value);
READ_LOCATION_FIELD(location);
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
READ_DONE();
}
/*
* parseNodeString
*
* Given a character string representing a node tree, parseNodeString creates
* the internal node structure.
*
* The string to be read must already have been loaded into pg_strtok().
*/
Node *
parseNodeString(void)
{
void *return_value;
2003-08-04 02:43:34 +02:00
READ_TEMP_LOCALS();
token = pg_strtok(&length);
#define MATCH(tokname, namelen) \
(length == namelen && memcmp(token, tokname, namelen) == 0)
if (MATCH("QUERY", 5))
return_value = _readQuery();
else if (MATCH("WITHCHECKOPTION", 15))
return_value = _readWithCheckOption();
else if (MATCH("SORTGROUPCLAUSE", 15))
return_value = _readSortGroupClause();
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
else if (MATCH("GROUPINGSET", 11))
return_value = _readGroupingSet();
else if (MATCH("WINDOWCLAUSE", 12))
return_value = _readWindowClause();
else if (MATCH("ROWMARKCLAUSE", 13))
return_value = _readRowMarkClause();
else if (MATCH("COMMONTABLEEXPR", 15))
return_value = _readCommonTableExpr();
else if (MATCH("SETOPERATIONSTMT", 16))
return_value = _readSetOperationStmt();
else if (MATCH("ALIAS", 5))
return_value = _readAlias();
else if (MATCH("RANGEVAR", 8))
return_value = _readRangeVar();
else if (MATCH("INTOCLAUSE", 10))
return_value = _readIntoClause();
else if (MATCH("TABLEFUNC", 9))
return_value = _readTableFunc();
else if (MATCH("VAR", 3))
return_value = _readVar();
else if (MATCH("CONST", 5))
return_value = _readConst();
else if (MATCH("PARAM", 5))
return_value = _readParam();
else if (MATCH("AGGREF", 6))
return_value = _readAggref();
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
else if (MATCH("GROUPINGFUNC", 12))
return_value = _readGroupingFunc();
else if (MATCH("WINDOWFUNC", 10))
return_value = _readWindowFunc();
else if (MATCH("ARRAYREF", 8))
return_value = _readArrayRef();
else if (MATCH("FUNCEXPR", 8))
return_value = _readFuncExpr();
else if (MATCH("NAMEDARGEXPR", 12))
return_value = _readNamedArgExpr();
else if (MATCH("OPEXPR", 6))
return_value = _readOpExpr();
else if (MATCH("DISTINCTEXPR", 12))
return_value = _readDistinctExpr();
else if (MATCH("NULLIFEXPR", 10))
return_value = _readNullIfExpr();
else if (MATCH("SCALARARRAYOPEXPR", 17))
return_value = _readScalarArrayOpExpr();
else if (MATCH("BOOLEXPR", 8))
return_value = _readBoolExpr();
else if (MATCH("SUBLINK", 7))
return_value = _readSubLink();
else if (MATCH("FIELDSELECT", 11))
return_value = _readFieldSelect();
else if (MATCH("FIELDSTORE", 10))
return_value = _readFieldStore();
else if (MATCH("RELABELTYPE", 11))
return_value = _readRelabelType();
else if (MATCH("COERCEVIAIO", 11))
return_value = _readCoerceViaIO();
else if (MATCH("ARRAYCOERCEEXPR", 15))
return_value = _readArrayCoerceExpr();
else if (MATCH("CONVERTROWTYPEEXPR", 18))
return_value = _readConvertRowtypeExpr();
else if (MATCH("COLLATE", 7))
return_value = _readCollateExpr();
else if (MATCH("CASE", 4))
return_value = _readCaseExpr();
else if (MATCH("WHEN", 4))
return_value = _readCaseWhen();
else if (MATCH("CASETESTEXPR", 12))
return_value = _readCaseTestExpr();
else if (MATCH("ARRAY", 5))
return_value = _readArrayExpr();
else if (MATCH("ROW", 3))
return_value = _readRowExpr();
else if (MATCH("ROWCOMPARE", 10))
return_value = _readRowCompareExpr();
else if (MATCH("COALESCE", 8))
return_value = _readCoalesceExpr();
else if (MATCH("MINMAX", 6))
return_value = _readMinMaxExpr();
else if (MATCH("SQLVALUEFUNCTION", 16))
return_value = _readSQLValueFunction();
else if (MATCH("XMLEXPR", 7))
return_value = _readXmlExpr();
else if (MATCH("NULLTEST", 8))
return_value = _readNullTest();
else if (MATCH("BOOLEANTEST", 11))
return_value = _readBooleanTest();
else if (MATCH("COERCETODOMAIN", 14))
return_value = _readCoerceToDomain();
else if (MATCH("COERCETODOMAINVALUE", 19))
return_value = _readCoerceToDomainValue();
else if (MATCH("SETTODEFAULT", 12))
return_value = _readSetToDefault();
else if (MATCH("CURRENTOFEXPR", 13))
return_value = _readCurrentOfExpr();
Code review for NextValueExpr expression node type. Add missing infrastructure for this node type, notably in ruleutils.c where its lack could demonstrably cause EXPLAIN to fail. Add outfuncs/readfuncs support. (outfuncs support is useful today for debugging purposes. The readfuncs support may never be needed, since at present it would only matter for parallel query and NextValueExpr should never appear in a parallelizable query; but it seems like a bad idea to have a primnode type that isn't fully supported here.) Teach planner infrastructure that NextValueExpr is a volatile, parallel-unsafe, non-leaky expression node with cost cpu_operator_cost. Given its limited scope of usage, there *might* be no live bug today from the lack of that knowledge, but it's certainly going to bite us on the rear someday. Teach pg_stat_statements about the new node type, too. While at it, also teach cost_qual_eval() that MinMaxExpr, SQLValueFunction, XmlExpr, and CoerceToDomain should be charged as cpu_operator_cost. Failing to do this for SQLValueFunction was an oversight in my commit 0bb51aa96. The others are longer-standing oversights, but no time like the present to fix them. (In principle, CoerceToDomain could have cost much higher than this, but it doesn't presently seem worth trying to examine the domain's constraints here.) Modify execExprInterp.c to execute NextValueExpr as an out-of-line function; it seems quite unlikely to me that it's worth insisting that it be inlined in all expression eval methods. Besides, providing the out-of-line function doesn't stop anyone from inlining if they want to. Adjust some places where NextValueExpr support had been inserted with the aid of a dartboard rather than keeping it in the same order as elsewhere. Discussion: https://postgr.es/m/23862.1499981661@sss.pgh.pa.us
2017-07-14 21:25:43 +02:00
else if (MATCH("NEXTVALUEEXPR", 13))
return_value = _readNextValueExpr();
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
else if (MATCH("INFERENCEELEM", 13))
return_value = _readInferenceElem();
else if (MATCH("TARGETENTRY", 11))
return_value = _readTargetEntry();
else if (MATCH("RANGETBLREF", 11))
return_value = _readRangeTblRef();
else if (MATCH("JOINEXPR", 8))
return_value = _readJoinExpr();
else if (MATCH("FROMEXPR", 8))
return_value = _readFromExpr();
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
else if (MATCH("ONCONFLICTEXPR", 14))
return_value = _readOnConflictExpr();
else if (MATCH("RTE", 3))
return_value = _readRangeTblEntry();
else if (MATCH("RANGETBLFUNCTION", 16))
return_value = _readRangeTblFunction();
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
else if (MATCH("TABLESAMPLECLAUSE", 17))
return_value = _readTableSampleClause();
else if (MATCH("NOTIFY", 6))
return_value = _readNotifyStmt();
else if (MATCH("DEFELEM", 7))
return_value = _readDefElem();
else if (MATCH("DECLARECURSOR", 13))
return_value = _readDeclareCursorStmt();
else if (MATCH("PLANNEDSTMT", 11))
return_value = _readPlannedStmt();
else if (MATCH("PLAN", 4))
return_value = _readPlan();
else if (MATCH("RESULT", 6))
return_value = _readResult();
Move targetlist SRF handling from expression evaluation to new executor node. Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT generate_series(1,5)) so far was done in the expression evaluation (i.e. ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code. This meant that most executor nodes performing projection, and most expression evaluation functions, had to deal with the possibility that an evaluated expression could return a set of return values. That's bad because it leads to repeated code in a lot of places. It also, and that's my (Andres's) motivation, made it a lot harder to implement a more efficient way of doing expression evaluation. To fix this, introduce a new executor node (ProjectSet) that can evaluate targetlists containing one or more SRFs. To avoid the complexity of the old way of handling nested expressions returning sets (e.g. having to pass up ExprDoneCond, and dealing with arguments to functions returning sets etc.), those SRFs can only be at the top level of the node's targetlist. The planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is only necessary in ProjectSet nodes and that SRFs are only present at the top level of the node's targetlist. If there are nested SRFs the planner creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get input from an underlying node. We also discussed and prototyped evaluating targetlist SRFs using ROWS FROM(), but that turned out to be more complicated than we'd hoped. While moving SRF evaluation to ProjectSet would allow to retain the old "least common multiple" behavior when multiple SRFs are present in one targetlist (i.e. continue returning rows until all SRFs are at the end of their input at the same time), we decided to instead only return rows till all SRFs are exhausted, returning NULL for already exhausted ones. We deemed the previous behavior to be too confusing, unexpected and actually not particularly useful. As a side effect, the previously prohibited case of multiple set returning arguments to a function, is now allowed. Not because it's particularly desirable, but because it ends up working and there seems to be no argument for adding code to prohibit it. Currently the behavior for COALESCE and CASE containing SRFs has changed, returning multiple rows from the expression, even when the SRF containing "arm" of the expression is not evaluated. That's because the SRFs are evaluated in a separate ProjectSet node. As that's quite confusing, we're likely to instead prohibit SRFs in those places. But that's still being discussed, and the code would reside in places not touched here, so that's a task for later. There's a lot of, now superfluous, code dealing with set return expressions around. But as the changes to get rid of those are verbose largely boring, it seems better for readability to keep the cleanup as a separate commit. Author: Tom Lane and Andres Freund Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
else if (MATCH("PROJECTSET", 10))
return_value = _readProjectSet();
else if (MATCH("MODIFYTABLE", 11))
return_value = _readModifyTable();
else if (MATCH("APPEND", 6))
return_value = _readAppend();
else if (MATCH("MERGEAPPEND", 11))
return_value = _readMergeAppend();
else if (MATCH("RECURSIVEUNION", 14))
return_value = _readRecursiveUnion();
else if (MATCH("BITMAPAND", 9))
return_value = _readBitmapAnd();
else if (MATCH("BITMAPOR", 8))
return_value = _readBitmapOr();
else if (MATCH("SCAN", 4))
return_value = _readScan();
else if (MATCH("SEQSCAN", 7))
return_value = _readSeqScan();
else if (MATCH("SAMPLESCAN", 10))
return_value = _readSampleScan();
else if (MATCH("INDEXSCAN", 9))
return_value = _readIndexScan();
else if (MATCH("INDEXONLYSCAN", 13))
return_value = _readIndexOnlyScan();
else if (MATCH("BITMAPINDEXSCAN", 15))
return_value = _readBitmapIndexScan();
else if (MATCH("BITMAPHEAPSCAN", 14))
return_value = _readBitmapHeapScan();
else if (MATCH("TIDSCAN", 7))
return_value = _readTidScan();
else if (MATCH("SUBQUERYSCAN", 12))
return_value = _readSubqueryScan();
else if (MATCH("FUNCTIONSCAN", 12))
return_value = _readFunctionScan();
else if (MATCH("VALUESSCAN", 10))
return_value = _readValuesScan();
else if (MATCH("TABLEFUNCSCAN", 13))
return_value = _readTableFuncScan();
else if (MATCH("CTESCAN", 7))
return_value = _readCteScan();
else if (MATCH("WORKTABLESCAN", 13))
return_value = _readWorkTableScan();
else if (MATCH("FOREIGNSCAN", 11))
return_value = _readForeignScan();
else if (MATCH("CUSTOMSCAN", 10))
return_value = _readCustomScan();
else if (MATCH("JOIN", 4))
return_value = _readJoin();
else if (MATCH("NESTLOOP", 8))
return_value = _readNestLoop();
else if (MATCH("MERGEJOIN", 9))
return_value = _readMergeJoin();
else if (MATCH("HASHJOIN", 8))
return_value = _readHashJoin();
else if (MATCH("MATERIAL", 8))
return_value = _readMaterial();
else if (MATCH("SORT", 4))
return_value = _readSort();
else if (MATCH("GROUP", 5))
return_value = _readGroup();
else if (MATCH("AGG", 3))
return_value = _readAgg();
else if (MATCH("WINDOWAGG", 9))
return_value = _readWindowAgg();
else if (MATCH("UNIQUE", 6))
return_value = _readUnique();
else if (MATCH("GATHER", 6))
return_value = _readGather();
else if (MATCH("GATHERMERGE", 11))
return_value = _readGatherMerge();
else if (MATCH("HASH", 4))
return_value = _readHash();
else if (MATCH("SETOP", 5))
return_value = _readSetOp();
else if (MATCH("LOCKROWS", 8))
return_value = _readLockRows();
else if (MATCH("LIMIT", 5))
return_value = _readLimit();
else if (MATCH("NESTLOOPPARAM", 13))
return_value = _readNestLoopParam();
else if (MATCH("PLANROWMARK", 11))
return_value = _readPlanRowMark();
else if (MATCH("PLANINVALITEM", 13))
return_value = _readPlanInvalItem();
else if (MATCH("SUBPLAN", 7))
return_value = _readSubPlan();
else if (MATCH("ALTERNATIVESUBPLAN", 18))
return_value = _readAlternativeSubPlan();
else if (MATCH("EXTENSIBLENODE", 14))
return_value = _readExtensibleNode();
else if (MATCH("PARTITIONBOUNDSPEC", 18))
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
return_value = _readPartitionBoundSpec();
else if (MATCH("PARTITIONRANGEDATUM", 19))
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
return_value = _readPartitionRangeDatum();
else
{
elog(ERROR, "badly formatted node string \"%.32s\"...", token);
return_value = NULL; /* keep compiler quiet */
}
1998-09-01 05:29:17 +02:00
return (Node *) return_value;
}
/*
* readDatum
*
* Given a string representation of a constant, recreate the appropriate
* Datum. The string representation embeds length info, but not byValue,
* so we must be told that.
*/
Datum
readDatum(bool typbyval)
{
Size length,
i;
int tokenLength;
char *token;
Datum res;
char *s;
/*
* read the actual length of the value
*/
token = pg_strtok(&tokenLength);
length = atoui(token);
token = pg_strtok(&tokenLength); /* read the '[' */
if (token == NULL || token[0] != '[')
elog(ERROR, "expected \"[\" to start datum, but got \"%s\"; length = %zu",
token ? (const char *) token : "[NULL]", length);
if (typbyval)
{
if (length > (Size) sizeof(Datum))
elog(ERROR, "byval datum but length = %zu", length);
res = (Datum) 0;
s = (char *) (&res);
for (i = 0; i < (Size) sizeof(Datum); i++)
{
token = pg_strtok(&tokenLength);
s[i] = (char) atoi(token);
}
}
else if (length <= 0)
res = (Datum) NULL;
else
{
s = (char *) palloc(length);
for (i = 0; i < length; i++)
{
token = pg_strtok(&tokenLength);
s[i] = (char) atoi(token);
}
res = PointerGetDatum(s);
}
token = pg_strtok(&tokenLength); /* read the ']' */
if (token == NULL || token[0] != ']')
elog(ERROR, "expected \"]\" to end datum, but got \"%s\"; length = %zu",
token ? (const char *) token : "[NULL]", length);
1998-09-01 05:29:17 +02:00
return res;
}
/*
* readAttrNumberCols
*/
AttrNumber *
readAttrNumberCols(int numCols)
{
int tokenLength,
i;
char *token;
AttrNumber *attr_vals;
if (numCols <= 0)
return NULL;
attr_vals = (AttrNumber *) palloc(numCols * sizeof(AttrNumber));
for (i = 0; i < numCols; i++)
{
token = pg_strtok(&tokenLength);
attr_vals[i] = atoi(token);
}
return attr_vals;
}
/*
* readOidCols
*/
Oid *
readOidCols(int numCols)
{
int tokenLength,
i;
char *token;
Oid *oid_vals;
if (numCols <= 0)
return NULL;
oid_vals = (Oid *) palloc(numCols * sizeof(Oid));
for (i = 0; i < numCols; i++)
{
token = pg_strtok(&tokenLength);
oid_vals[i] = atooid(token);
}
return oid_vals;
}
/*
* readIntCols
*/
int *
readIntCols(int numCols)
{
int tokenLength,
i;
char *token;
int *int_vals;
if (numCols <= 0)
return NULL;
int_vals = (int *) palloc(numCols * sizeof(int));
for (i = 0; i < numCols; i++)
{
token = pg_strtok(&tokenLength);
int_vals[i] = atoi(token);
}
return int_vals;
}
/*
* readBoolCols
*/
bool *
readBoolCols(int numCols)
{
int tokenLength,
i;
char *token;
bool *bool_vals;
if (numCols <= 0)
return NULL;
bool_vals = (bool *) palloc(numCols * sizeof(bool));
for (i = 0; i < numCols; i++)
{
token = pg_strtok(&tokenLength);
bool_vals[i] = strtobool(token);
}
return bool_vals;
}