2002-11-25 19:12:12 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* outfuncs.c
|
2002-11-25 19:12:12 +01:00
|
|
|
* Output functions for Postgres tree nodes.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2019-01-02 18:44:25 +01:00
|
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2002-11-25 19:12:12 +01:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/nodes/outfuncs.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* NOTES
|
2002-11-25 19:12:12 +01:00
|
|
|
* Every node type that can appear in stored rules' parsetrees *must*
|
|
|
|
* have an output function defined here (as well as an input function
|
2015-09-23 17:51:50 +02:00
|
|
|
* in readfuncs.c). In addition, plan nodes should have input and
|
|
|
|
* output functions so that they can be sent to parallel workers.
|
2018-09-16 19:02:47 +02:00
|
|
|
*
|
2015-09-23 17:51:50 +02:00
|
|
|
* For use in debugging, we also provide output functions for nodes
|
2018-09-16 19:02:47 +02:00
|
|
|
* that appear in raw parsetrees and planner Paths. These node types
|
|
|
|
* need not have input functions. Output support for raw parsetrees
|
|
|
|
* is somewhat incomplete, too; in particular, utility statements are
|
|
|
|
* almost entirely unsupported. We try to support everything that can
|
|
|
|
* appear in a raw SELECT, though.
|
2002-11-25 19:12:12 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1999-07-17 22:18:55 +02:00
|
|
|
#include "postgres.h"
|
2000-01-14 01:53:21 +01:00
|
|
|
|
2000-10-31 11:22:13 +01:00
|
|
|
#include <ctype.h>
|
|
|
|
|
2012-03-05 22:15:59 +01:00
|
|
|
#include "lib/stringinfo.h"
|
2018-12-10 17:12:43 +01:00
|
|
|
#include "miscadmin.h"
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
#include "nodes/extensible.h"
|
2019-01-29 22:49:25 +01:00
|
|
|
#include "nodes/pathnodes.h"
|
2012-03-05 22:15:59 +01:00
|
|
|
#include "nodes/plannodes.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "utils/datum.h"
|
2016-06-18 21:22:34 +02:00
|
|
|
#include "utils/rel.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2017-06-22 04:57:23 +02:00
|
|
|
static void outChar(StringInfo str, char c);
|
|
|
|
|
1998-05-10 01:46:35 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Macros to simplify output of different kinds of fields. Use these
|
|
|
|
* wherever possible to reduce the chance for silly typos. Note that these
|
2002-11-25 19:12:12 +01:00
|
|
|
* hard-wire conventions about the names of the local variables in an Out
|
|
|
|
* routine.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Write the label for the node type */
|
|
|
|
#define WRITE_NODE_TYPE(nodelabel) \
|
2003-04-24 23:16:45 +02:00
|
|
|
appendStringInfoString(str, nodelabel)
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
/* Write an integer field (anything written as ":fldname %d") */
|
|
|
|
#define WRITE_INT_FIELD(fldname) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname)
|
|
|
|
|
|
|
|
/* Write an unsigned integer field (anything written as ":fldname %u") */
|
|
|
|
#define WRITE_UINT_FIELD(fldname) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
|
|
|
|
|
2017-10-12 01:52:46 +02:00
|
|
|
/* Write an unsigned integer field (anything written with UINT64_FORMAT) */
|
|
|
|
#define WRITE_UINT64_FIELD(fldname) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " " UINT64_FORMAT, \
|
|
|
|
node->fldname)
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/* Write an OID field (don't hard-wire assumption that OID is same as uint) */
|
|
|
|
#define WRITE_OID_FIELD(fldname) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
|
|
|
|
|
|
|
|
/* Write a long-integer field */
|
|
|
|
#define WRITE_LONG_FIELD(fldname) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " %ld", node->fldname)
|
|
|
|
|
|
|
|
/* Write a char field (ie, one ascii character) */
|
|
|
|
#define WRITE_CHAR_FIELD(fldname) \
|
2017-06-22 04:57:23 +02:00
|
|
|
(appendStringInfo(str, " :" CppAsString(fldname) " "), \
|
|
|
|
outChar(str, node->fldname))
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
/* Write an enumerated-type field as an integer code */
|
|
|
|
#define WRITE_ENUM_FIELD(fldname, enumtype) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " %d", \
|
|
|
|
(int) node->fldname)
|
|
|
|
|
|
|
|
/* Write a float field --- caller must give format to define precision */
|
|
|
|
#define WRITE_FLOAT_FIELD(fldname,format) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname)
|
|
|
|
|
|
|
|
/* Write a boolean field */
|
|
|
|
#define WRITE_BOOL_FIELD(fldname) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " %s", \
|
|
|
|
booltostr(node->fldname))
|
|
|
|
|
|
|
|
/* Write a character-string (possibly NULL) field */
|
|
|
|
#define WRITE_STRING_FIELD(fldname) \
|
2017-08-16 05:34:39 +02:00
|
|
|
(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
|
2016-09-16 15:36:19 +02:00
|
|
|
outToken(str, node->fldname))
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2008-08-29 01:09:48 +02:00
|
|
|
/* Write a parse location field (actually same as INT case) */
|
|
|
|
#define WRITE_LOCATION_FIELD(fldname) \
|
|
|
|
appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname)
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/* Write a Node field */
|
|
|
|
#define WRITE_NODE_FIELD(fldname) \
|
2017-08-16 05:34:39 +02:00
|
|
|
(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
|
2016-04-08 23:26:36 +02:00
|
|
|
outNode(str, node->fldname))
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2003-02-08 21:20:55 +01:00
|
|
|
/* Write a bitmapset field */
|
|
|
|
#define WRITE_BITMAPSET_FIELD(fldname) \
|
2017-08-16 05:34:39 +02:00
|
|
|
(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
|
2016-09-16 15:36:19 +02:00
|
|
|
outBitmapset(str, node->fldname))
|
2003-02-08 21:20:55 +01:00
|
|
|
|
2018-12-22 06:53:37 +01:00
|
|
|
#define WRITE_ATTRNUMBER_ARRAY(fldname, len) \
|
|
|
|
do { \
|
|
|
|
appendStringInfoString(str, " :" CppAsString(fldname) " "); \
|
|
|
|
for (int i = 0; i < len; i++) \
|
|
|
|
appendStringInfo(str, " %d", node->fldname[i]); \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define WRITE_OID_ARRAY(fldname, len) \
|
|
|
|
do { \
|
|
|
|
appendStringInfoString(str, " :" CppAsString(fldname) " "); \
|
|
|
|
for (int i = 0; i < len; i++) \
|
|
|
|
appendStringInfo(str, " %u", node->fldname[i]); \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define WRITE_INT_ARRAY(fldname, len) \
|
|
|
|
do { \
|
|
|
|
appendStringInfoString(str, " :" CppAsString(fldname) " "); \
|
|
|
|
for (int i = 0; i < len; i++) \
|
|
|
|
appendStringInfo(str, " %d", node->fldname[i]); \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
#define WRITE_BOOL_ARRAY(fldname, len) \
|
|
|
|
do { \
|
|
|
|
appendStringInfoString(str, " :" CppAsString(fldname) " "); \
|
|
|
|
for (int i = 0; i < len; i++) \
|
|
|
|
appendStringInfo(str, " %s", booltostr(node->fldname[i])); \
|
|
|
|
} while(0)
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2001-01-08 01:31:43 +01:00
|
|
|
#define booltostr(x) ((x) ? "true" : "false")
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
|
2000-01-14 01:53:21 +01:00
|
|
|
/*
|
2016-09-16 15:36:19 +02:00
|
|
|
* outToken
|
2000-01-14 01:53:21 +01:00
|
|
|
* Convert an ordinary string (eg, an identifier) into a form that
|
|
|
|
* will be decoded back to a plain token by read.c's functions.
|
|
|
|
*
|
|
|
|
* If a null or empty string is given, it is encoded as "<>".
|
|
|
|
*/
|
2016-09-16 15:36:19 +02:00
|
|
|
void
|
|
|
|
outToken(StringInfo str, const char *s)
|
2000-01-14 01:53:21 +01:00
|
|
|
{
|
|
|
|
if (s == NULL || *s == '\0')
|
|
|
|
{
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "<>");
|
2000-01-14 01:53:21 +01:00
|
|
|
return;
|
|
|
|
}
|
2000-04-12 19:17:23 +02:00
|
|
|
|
2000-01-14 01:53:21 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Look for characters or patterns that are treated specially by read.c
|
|
|
|
* (either in pg_strtok() or in nodeRead()), and therefore need a
|
|
|
|
* protective backslash.
|
2000-01-14 01:53:21 +01:00
|
|
|
*/
|
|
|
|
/* These characters only need to be quoted at the start of the string */
|
|
|
|
if (*s == '<' ||
|
2015-12-23 04:43:46 +01:00
|
|
|
*s == '"' ||
|
2000-12-03 21:45:40 +01:00
|
|
|
isdigit((unsigned char) *s) ||
|
2001-01-08 01:31:43 +01:00
|
|
|
((*s == '+' || *s == '-') &&
|
|
|
|
(isdigit((unsigned char) s[1]) || s[1] == '.')))
|
2000-01-14 01:53:21 +01:00
|
|
|
appendStringInfoChar(str, '\\');
|
|
|
|
while (*s)
|
|
|
|
{
|
|
|
|
/* These chars must be backslashed anywhere in the string */
|
|
|
|
if (*s == ' ' || *s == '\n' || *s == '\t' ||
|
|
|
|
*s == '(' || *s == ')' || *s == '{' || *s == '}' ||
|
|
|
|
*s == '\\')
|
|
|
|
appendStringInfoChar(str, '\\');
|
|
|
|
appendStringInfoChar(str, *s++);
|
|
|
|
}
|
|
|
|
}
|
1999-08-31 03:28:37 +02:00
|
|
|
|
2017-06-22 04:57:23 +02:00
|
|
|
/*
|
|
|
|
* Convert one char. Goes through outToken() so that special characters are
|
|
|
|
* escaped.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
outChar(StringInfo str, char c)
|
|
|
|
{
|
|
|
|
char in[2];
|
|
|
|
|
|
|
|
in[0] = c;
|
|
|
|
in[1] = '\0';
|
|
|
|
|
|
|
|
outToken(str, in);
|
|
|
|
}
|
|
|
|
|
1996-11-10 04:06:38 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outList(StringInfo str, const List *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2012-06-10 21:20:04 +02:00
|
|
|
const ListCell *lc;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-14 01:53:21 +01:00
|
|
|
appendStringInfoChar(str, '(');
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2004-05-26 06:41:50 +02:00
|
|
|
if (IsA(node, IntList))
|
|
|
|
appendStringInfoChar(str, 'i');
|
|
|
|
else if (IsA(node, OidList))
|
|
|
|
appendStringInfoChar(str, 'o');
|
|
|
|
|
2004-08-29 07:07:03 +02:00
|
|
|
foreach(lc, node)
|
2004-05-26 06:41:50 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* For the sake of backward compatibility, we emit a slightly
|
2005-10-15 04:49:52 +02:00
|
|
|
* different whitespace format for lists of nodes vs. other types of
|
|
|
|
* lists. XXX: is this necessary?
|
2004-05-26 06:41:50 +02:00
|
|
|
*/
|
|
|
|
if (IsA(node, List))
|
|
|
|
{
|
2016-04-08 23:26:36 +02:00
|
|
|
outNode(str, lfirst(lc));
|
2004-05-26 06:41:50 +02:00
|
|
|
if (lnext(lc))
|
|
|
|
appendStringInfoChar(str, ' ');
|
|
|
|
}
|
|
|
|
else if (IsA(node, IntList))
|
|
|
|
appendStringInfo(str, " %d", lfirst_int(lc));
|
|
|
|
else if (IsA(node, OidList))
|
|
|
|
appendStringInfo(str, " %u", lfirst_oid(lc));
|
|
|
|
else
|
2004-08-29 07:07:03 +02:00
|
|
|
elog(ERROR, "unrecognized list node type: %d",
|
|
|
|
(int) node->type);
|
2004-05-26 06:41:50 +02:00
|
|
|
}
|
2001-01-08 01:31:43 +01:00
|
|
|
|
|
|
|
appendStringInfoChar(str, ')');
|
|
|
|
}
|
|
|
|
|
2003-02-08 21:20:55 +01:00
|
|
|
/*
|
2016-09-16 15:36:19 +02:00
|
|
|
* outBitmapset -
|
2003-02-08 21:20:55 +01:00
|
|
|
* converts a bitmap set of integers
|
|
|
|
*
|
2004-05-08 23:21:18 +02:00
|
|
|
* Note: the output format is "(b int int ...)", similar to an integer List.
|
2003-02-08 21:20:55 +01:00
|
|
|
*/
|
2016-09-16 15:36:19 +02:00
|
|
|
void
|
|
|
|
outBitmapset(StringInfo str, const Bitmapset *bms)
|
2003-02-08 21:20:55 +01:00
|
|
|
{
|
|
|
|
int x;
|
|
|
|
|
|
|
|
appendStringInfoChar(str, '(');
|
2004-05-08 23:21:18 +02:00
|
|
|
appendStringInfoChar(str, 'b');
|
2014-11-28 19:37:25 +01:00
|
|
|
x = -1;
|
|
|
|
while ((x = bms_next_member(bms, x)) >= 0)
|
2003-02-08 21:20:55 +01:00
|
|
|
appendStringInfo(str, " %d", x);
|
|
|
|
appendStringInfoChar(str, ')');
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*
|
|
|
|
* Print the value of a Datum given its type.
|
|
|
|
*/
|
2016-04-08 23:26:36 +02:00
|
|
|
void
|
|
|
|
outDatum(StringInfo str, Datum value, int typlen, bool typbyval)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
Size length,
|
|
|
|
i;
|
|
|
|
char *s;
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
length = datumGetSize(value, typbyval, typlen);
|
1999-01-21 17:08:55 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
if (typbyval)
|
1997-12-05 00:20:32 +01:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
s = (char *) (&value);
|
|
|
|
appendStringInfo(str, "%u [ ", (unsigned int) length);
|
|
|
|
for (i = 0; i < (Size) sizeof(Datum); i++)
|
|
|
|
appendStringInfo(str, "%d ", (int) (s[i]));
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoChar(str, ']');
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
s = (char *) DatumGetPointer(value);
|
|
|
|
if (!PointerIsValid(s))
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "0 [ ]");
|
2002-12-12 16:49:42 +01:00
|
|
|
else
|
1997-12-05 00:20:32 +01:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
appendStringInfo(str, "%u [ ", (unsigned int) length);
|
|
|
|
for (i = 0; i < length; i++)
|
|
|
|
appendStringInfo(str, "%d ", (int) (s[i]));
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoChar(str, ']');
|
1997-12-05 00:20:32 +01:00
|
|
|
}
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2000-10-05 21:11:39 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* Stuff from plannodes.h
|
|
|
|
*/
|
|
|
|
|
2007-02-20 18:32:18 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlannedStmt(StringInfo str, const PlannedStmt *node)
|
2007-02-20 18:32:18 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PLANNEDSTMT");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(commandType, CmdType);
|
2017-10-12 01:52:46 +02:00
|
|
|
WRITE_UINT64_FIELD(queryId);
|
2009-10-10 03:43:50 +02:00
|
|
|
WRITE_BOOL_FIELD(hasReturning);
|
2011-02-26 00:56:23 +01:00
|
|
|
WRITE_BOOL_FIELD(hasModifyingCTE);
|
2007-02-20 18:32:18 +01:00
|
|
|
WRITE_BOOL_FIELD(canSetTag);
|
2010-08-18 17:21:54 +02:00
|
|
|
WRITE_BOOL_FIELD(transientPlan);
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
WRITE_BOOL_FIELD(dependsOnRole);
|
|
|
|
WRITE_BOOL_FIELD(parallelModeNeeded);
|
2018-04-28 22:46:24 +02:00
|
|
|
WRITE_INT_FIELD(jitFlags);
|
2007-02-20 18:32:18 +01:00
|
|
|
WRITE_NODE_FIELD(planTree);
|
|
|
|
WRITE_NODE_FIELD(rtable);
|
|
|
|
WRITE_NODE_FIELD(resultRelations);
|
2017-05-01 14:23:01 +02:00
|
|
|
WRITE_NODE_FIELD(rootResultRelations);
|
2007-02-22 23:00:26 +01:00
|
|
|
WRITE_NODE_FIELD(subplans);
|
2007-02-27 02:11:26 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(rewindPlanIDs);
|
2007-02-20 18:32:18 +01:00
|
|
|
WRITE_NODE_FIELD(rowMarks);
|
2007-10-11 20:05:27 +02:00
|
|
|
WRITE_NODE_FIELD(relationOids);
|
2008-09-09 20:58:09 +02:00
|
|
|
WRITE_NODE_FIELD(invalItems);
|
2017-11-13 21:24:12 +01:00
|
|
|
WRITE_NODE_FIELD(paramExecTypes);
|
Change representation of statement lists, and add statement location info.
This patch makes several changes that improve the consistency of
representation of lists of statements. It's always been the case
that the output of parse analysis is a list of Query nodes, whatever
the types of the individual statements in the list. This patch brings
similar consistency to the outputs of raw parsing and planning steps:
* The output of raw parsing is now always a list of RawStmt nodes;
the statement-type-dependent nodes are one level down from that.
* The output of pg_plan_queries() is now always a list of PlannedStmt
nodes, even for utility statements. In the case of a utility statement,
"planning" just consists of wrapping a CMD_UTILITY PlannedStmt around
the utility node. This list representation is now used in Portal and
CachedPlan plan lists, replacing the former convention of intermixing
PlannedStmts with bare utility-statement nodes.
Now, every list of statements has a consistent head-node type depending
on how far along it is in processing. This allows changing many places
that formerly used generic "Node *" pointers to use a more specific
pointer type, thus reducing the number of IsA() tests and casts needed,
as well as improving code clarity.
Also, the post-parse-analysis representation of DECLARE CURSOR is changed
so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained
SELECT remains a child of the DeclareCursorStmt rather than getting flipped
around to be the other way. It's now true for both Query and PlannedStmt
that utilityStmt is non-null if and only if commandType is CMD_UTILITY.
That allows simplifying a lot of places that were testing both fields.
(I think some of those were just defensive programming, but in many places,
it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.)
Because PlannedStmt carries a canSetTag field, we're also able to get rid
of some ad-hoc rules about how to reconstruct canSetTag for a bare utility
statement; specifically, the assumption that a utility is canSetTag if and
only if it's the only one in its list. While I see no near-term need for
relaxing that restriction, it's nice to get rid of the ad-hocery.
The API of ProcessUtility() is changed so that what it's passed is the
wrapper PlannedStmt not just the bare utility statement. This will affect
all users of ProcessUtility_hook, but the changes are pretty trivial; see
the affected contrib modules for examples of the minimum change needed.
(Most compilers should give pointer-type-mismatch warnings for uncorrected
code.)
There's also a change in the API of ExplainOneQuery_hook, to pass through
cursorOptions instead of expecting hook functions to know what to pick.
This is needed because of the DECLARE CURSOR changes, but really should
have been done in 9.6; it's unlikely that any extant hook functions
know about using CURSOR_OPT_PARALLEL_OK.
Finally, teach gram.y to save statement boundary locations in RawStmt
nodes, and pass those through to Query and PlannedStmt nodes. This allows
more intelligent handling of cases where a source query string contains
multiple statements. This patch doesn't actually do anything with the
information, but a follow-on patch will. (Passing this information through
cleanly is the true motivation for these changes; while I think this is all
good cleanup, it's unlikely we'd have bothered without this end goal.)
catversion bump because addition of location fields to struct Query
affects stored rules.
This patch is by me, but it owes a good deal to Fabien Coelho who did
a lot of preliminary work on the problem, and also reviewed the patch.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
|
|
|
WRITE_NODE_FIELD(utilityStmt);
|
|
|
|
WRITE_LOCATION_FIELD(stmt_location);
|
|
|
|
WRITE_LOCATION_FIELD(stmt_len);
|
2007-02-20 18:32:18 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
|
|
|
* print the basic stuff of all nodes that inherit from Plan
|
|
|
|
*/
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(StringInfo str, const Plan *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
|
|
|
|
WRITE_FLOAT_FIELD(total_cost, "%.2f");
|
|
|
|
WRITE_FLOAT_FIELD(plan_rows, "%.0f");
|
|
|
|
WRITE_INT_FIELD(plan_width);
|
2015-11-11 14:57:52 +01:00
|
|
|
WRITE_BOOL_FIELD(parallel_aware);
|
2017-04-12 21:13:23 +02:00
|
|
|
WRITE_BOOL_FIELD(parallel_safe);
|
2015-09-29 03:55:57 +02:00
|
|
|
WRITE_INT_FIELD(plan_node_id);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(targetlist);
|
|
|
|
WRITE_NODE_FIELD(qual);
|
|
|
|
WRITE_NODE_FIELD(lefttree);
|
|
|
|
WRITE_NODE_FIELD(righttree);
|
2002-12-05 16:50:39 +01:00
|
|
|
WRITE_NODE_FIELD(initPlan);
|
2003-02-09 01:30:41 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(extParam);
|
|
|
|
WRITE_BITMAPSET_FIELD(allParam);
|
2002-11-25 19:12:12 +01:00
|
|
|
}
|
1998-12-15 03:24:15 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
/*
|
|
|
|
* print the basic stuff of all nodes that inherit from Scan
|
|
|
|
*/
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(StringInfo str, const Scan *node)
|
2002-11-25 19:12:12 +01:00
|
|
|
{
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
1998-12-15 03:24:15 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_UINT_FIELD(scanrelid);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2002-11-25 19:12:12 +01:00
|
|
|
* print the basic stuff of all nodes that inherit from Join
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPlanInfo(StringInfo str, const Join *node)
|
2002-11-25 19:12:12 +01:00
|
|
|
{
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(jointype, JoinType);
|
2017-04-08 04:20:03 +02:00
|
|
|
WRITE_BOOL_FIELD(inner_unique);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(joinqual);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlan(StringInfo str, const Plan *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("PLAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outResult(StringInfo str, const Result *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("RESULT");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(resconstantqual);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
|
|
|
static void
|
|
|
|
_outProjectSet(StringInfo str, const ProjectSet *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PROJECTSET");
|
|
|
|
|
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
|
|
|
}
|
|
|
|
|
2009-10-10 03:43:50 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outModifyTable(StringInfo str, const ModifyTable *node)
|
2009-10-10 03:43:50 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MODIFYTABLE");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2009-10-10 03:43:50 +02:00
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(operation, CmdType);
|
2011-02-26 00:56:23 +01:00
|
|
|
WRITE_BOOL_FIELD(canSetTag);
|
2015-02-18 00:04:11 +01:00
|
|
|
WRITE_UINT_FIELD(nominalRelation);
|
2018-10-07 20:33:17 +02:00
|
|
|
WRITE_UINT_FIELD(rootRelation);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
WRITE_BOOL_FIELD(partColsUpdated);
|
2009-10-10 03:43:50 +02:00
|
|
|
WRITE_NODE_FIELD(resultRelations);
|
2011-02-26 00:56:23 +01:00
|
|
|
WRITE_INT_FIELD(resultRelIndex);
|
2017-05-01 14:23:01 +02:00
|
|
|
WRITE_INT_FIELD(rootResultRelIndex);
|
2009-10-10 03:43:50 +02:00
|
|
|
WRITE_NODE_FIELD(plans);
|
2013-07-18 23:10:16 +02:00
|
|
|
WRITE_NODE_FIELD(withCheckOptionLists);
|
2009-10-10 03:43:50 +02:00
|
|
|
WRITE_NODE_FIELD(returningLists);
|
2013-03-10 19:14:53 +01:00
|
|
|
WRITE_NODE_FIELD(fdwPrivLists);
|
2016-03-18 18:48:58 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(fdwDirectModifyPlans);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
WRITE_NODE_FIELD(rowMarks);
|
|
|
|
WRITE_INT_FIELD(epqParam);
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
WRITE_ENUM_FIELD(onConflictAction, OnConflictAction);
|
|
|
|
WRITE_NODE_FIELD(arbiterIndexes);
|
|
|
|
WRITE_NODE_FIELD(onConflictSet);
|
|
|
|
WRITE_NODE_FIELD(onConflictWhere);
|
2015-08-06 02:44:27 +02:00
|
|
|
WRITE_UINT_FIELD(exclRelRTI);
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
WRITE_NODE_FIELD(exclRelTlist);
|
2009-10-10 03:43:50 +02:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAppend(StringInfo str, const Append *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("APPEND");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(appendplans);
|
Support Parallel Append plan nodes.
When we create an Append node, we can spread out the workers over the
subplans instead of piling on to each subplan one at a time, which
should typically be a bit more efficient, both because the startup
cost of any plan executed entirely by one worker is paid only once and
also because of reduced contention. We can also construct Append
plans using a mix of partial and non-partial subplans, which may allow
for parallelism in places that otherwise couldn't support it.
Unfortunately, this patch doesn't handle the important case of
parallelizing UNION ALL by running each branch in a separate worker;
the executor infrastructure is added here, but more planner work is
needed.
Amit Khandekar, Robert Haas, Amul Sul, reviewed and tested by
Ashutosh Bapat, Amit Langote, Rafia Sabih, Amit Kapila, and
Rajkumar Raghuwanshi.
Discussion: http://postgr.es/m/CAJ3gD9dy0K_E8r727heqXoBmWZ83HwLFwdcaSSmBQ1+S+vRuUQ@mail.gmail.com
2017-12-05 23:28:39 +01:00
|
|
|
WRITE_INT_FIELD(first_partial_plan);
|
2018-08-02 01:42:46 +02:00
|
|
|
WRITE_NODE_FIELD(part_prune_info);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2010-10-14 22:56:39 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outMergeAppend(StringInfo str, const MergeAppend *node)
|
2010-10-14 22:56:39 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MERGEAPPEND");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2010-10-14 22:56:39 +02:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(mergeplans);
|
|
|
|
WRITE_INT_FIELD(numCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(sortColIdx, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(sortOperators, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(collations, node->numCols);
|
|
|
|
WRITE_BOOL_ARRAY(nullsFirst, node->numCols);
|
2018-08-02 01:42:46 +02:00
|
|
|
WRITE_NODE_FIELD(part_prune_info);
|
2010-10-14 22:56:39 +02:00
|
|
|
}
|
|
|
|
|
2008-10-04 23:56:55 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRecursiveUnion(StringInfo str, const RecursiveUnion *node)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RECURSIVEUNION");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2008-10-04 23:56:55 +02:00
|
|
|
|
|
|
|
WRITE_INT_FIELD(wtParam);
|
2008-10-07 21:27:04 +02:00
|
|
|
WRITE_INT_FIELD(numCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(dupOperators, node->numCols);
|
2019-03-22 12:09:32 +01:00
|
|
|
WRITE_OID_ARRAY(dupCollations, node->numCols);
|
2008-10-07 21:27:04 +02:00
|
|
|
WRITE_LONG_FIELD(numGroups);
|
2008-10-04 23:56:55 +02:00
|
|
|
}
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBitmapAnd(StringInfo str, const BitmapAnd *node)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("BITMAPAND");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(bitmapplans);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBitmapOr(StringInfo str, const BitmapOr *node)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("BITMAPOR");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
WRITE_BOOL_FIELD(isshared);
|
2005-04-20 00:35:18 +02:00
|
|
|
WRITE_NODE_FIELD(bitmapplans);
|
|
|
|
}
|
|
|
|
|
Add a Gather executor node.
A Gather executor node runs any number of copies of a plan in an equal
number of workers and merges all of the results into a single tuple
stream. It can also run the plan itself, if the workers are
unavailable or haven't started up yet. It is intended to work with
the Partial Seq Scan node which will be added in future commits.
It could also be used to implement parallel query of a different sort
by itself, without help from Partial Seq Scan, if the single_copy mode
is used. In that mode, a worker executes the plan, and the parallel
leader does not, merely collecting the worker's results. So, a Gather
node could be inserted into a plan to split the execution of that plan
across two processes. Nested Gather nodes aren't currently supported,
but we might want to add support for that in the future.
There's nothing in the planner to actually generate Gather nodes yet,
so it's not quite time to break out the champagne. But we're getting
close.
Amit Kapila. Some designs suggestions were provided by me, and I also
reviewed the patch. Single-copy mode, documentation, and other minor
changes also by me.
2015-10-01 01:23:36 +02:00
|
|
|
static void
|
|
|
|
_outGather(StringInfo str, const Gather *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GATHER");
|
|
|
|
|
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
|
|
|
|
2015-10-01 15:15:36 +02:00
|
|
|
WRITE_INT_FIELD(num_workers);
|
Force rescanning of parallel-aware scan nodes below a Gather[Merge].
The ExecReScan machinery contains various optimizations for postponing
or skipping rescans of plan subtrees; for example a HashAgg node may
conclude that it can re-use the table it built before, instead of
re-reading its input subtree. But that is wrong if the input contains
a parallel-aware table scan node, since the portion of the table scanned
by the leader process is likely to vary from one rescan to the next.
This explains the timing-dependent buildfarm failures we saw after
commit a2b70c89c.
The established mechanism for showing that a plan node's output is
potentially variable is to mark it as depending on some runtime Param.
Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC
parameter number, but carries no actual value) associated with each Gather
or GatherMerge node, mark parallel-aware nodes below that node as dependent
on that Param, and arrange for ExecReScanGather[Merge] to flag that Param
as changed whenever the Gather[Merge] node is rescanned.
This solution breaks an undocumented assumption made by the parallel
executor logic, namely that all rescans of nodes below a Gather[Merge]
will happen synchronously during the ReScan of the top node itself.
But that's fundamentally contrary to the design of the ExecReScan code,
and so was doomed to fail someday anyway (even if you want to argue
that the bug being fixed here wasn't a failure of that assumption).
A follow-on patch will address that issue. In the meantime, the worst
that's expected to happen is that given very bad timing luck, the leader
might have to do all the work during a rescan, because workers think
they have nothing to do, if they are able to start up before the eventual
ReScan of the leader's parallel-aware table scan node has reset the
shared scan state.
Although this problem exists in 9.6, there does not seem to be any way
for it to manifest there. Without GatherMerge, it seems that a plan tree
that has a rescan-short-circuiting node below Gather will always also
have one above it that will short-circuit in the same cases, preventing
the Gather from being rescanned. Hence we won't take the risk of
back-patching this change into 9.6. But v10 needs it.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
|
|
|
WRITE_INT_FIELD(rescan_param);
|
2015-10-01 15:15:36 +02:00
|
|
|
WRITE_BOOL_FIELD(single_copy);
|
2016-02-07 17:39:22 +01:00
|
|
|
WRITE_BOOL_FIELD(invisible);
|
2017-11-16 18:06:14 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(initParam);
|
Add a Gather executor node.
A Gather executor node runs any number of copies of a plan in an equal
number of workers and merges all of the results into a single tuple
stream. It can also run the plan itself, if the workers are
unavailable or haven't started up yet. It is intended to work with
the Partial Seq Scan node which will be added in future commits.
It could also be used to implement parallel query of a different sort
by itself, without help from Partial Seq Scan, if the single_copy mode
is used. In that mode, a worker executes the plan, and the parallel
leader does not, merely collecting the worker's results. So, a Gather
node could be inserted into a plan to split the execution of that plan
across two processes. Nested Gather nodes aren't currently supported,
but we might want to add support for that in the future.
There's nothing in the planner to actually generate Gather nodes yet,
so it's not quite time to break out the champagne. But we're getting
close.
Amit Kapila. Some designs suggestions were provided by me, and I also
reviewed the patch. Single-copy mode, documentation, and other minor
changes also by me.
2015-10-01 01:23:36 +02:00
|
|
|
}
|
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
static void
|
|
|
|
_outGatherMerge(StringInfo str, const GatherMerge *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GATHERMERGE");
|
|
|
|
|
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
|
|
|
|
|
|
|
WRITE_INT_FIELD(num_workers);
|
Force rescanning of parallel-aware scan nodes below a Gather[Merge].
The ExecReScan machinery contains various optimizations for postponing
or skipping rescans of plan subtrees; for example a HashAgg node may
conclude that it can re-use the table it built before, instead of
re-reading its input subtree. But that is wrong if the input contains
a parallel-aware table scan node, since the portion of the table scanned
by the leader process is likely to vary from one rescan to the next.
This explains the timing-dependent buildfarm failures we saw after
commit a2b70c89c.
The established mechanism for showing that a plan node's output is
potentially variable is to mark it as depending on some runtime Param.
Hence, to fix this, invent a dummy Param (one that has a PARAM_EXEC
parameter number, but carries no actual value) associated with each Gather
or GatherMerge node, mark parallel-aware nodes below that node as dependent
on that Param, and arrange for ExecReScanGather[Merge] to flag that Param
as changed whenever the Gather[Merge] node is rescanned.
This solution breaks an undocumented assumption made by the parallel
executor logic, namely that all rescans of nodes below a Gather[Merge]
will happen synchronously during the ReScan of the top node itself.
But that's fundamentally contrary to the design of the ExecReScan code,
and so was doomed to fail someday anyway (even if you want to argue
that the bug being fixed here wasn't a failure of that assumption).
A follow-on patch will address that issue. In the meantime, the worst
that's expected to happen is that given very bad timing luck, the leader
might have to do all the work during a rescan, because workers think
they have nothing to do, if they are able to start up before the eventual
ReScan of the leader's parallel-aware table scan node has reset the
shared scan state.
Although this problem exists in 9.6, there does not seem to be any way
for it to manifest there. Without GatherMerge, it seems that a plan tree
that has a rescan-short-circuiting node below Gather will always also
have one above it that will short-circuit in the same cases, preventing
the Gather from being rescanned. Hence we won't take the risk of
back-patching this change into 9.6. But v10 needs it.
Discussion: https://postgr.es/m/CAA4eK1JkByysFJNh9M349u_nNjqETuEnY_y1VUc_kJiU0bxtaQ@mail.gmail.com
2017-08-30 15:29:55 +02:00
|
|
|
WRITE_INT_FIELD(rescan_param);
|
2017-03-09 13:40:36 +01:00
|
|
|
WRITE_INT_FIELD(numCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(sortColIdx, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(sortOperators, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(collations, node->numCols);
|
|
|
|
WRITE_BOOL_ARRAY(nullsFirst, node->numCols);
|
2017-11-16 18:06:14 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(initParam);
|
2017-03-09 13:40:36 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScan(StringInfo str, const Scan *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("SCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, node);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSeqScan(StringInfo str, const SeqScan *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("SEQSCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
static void
|
|
|
|
_outSampleScan(StringInfo str, const SampleScan *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SAMPLESCAN");
|
|
|
|
|
|
|
|
_outScanInfo(str, (const Scan *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(tablesample);
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outIndexScan(StringInfo str, const IndexScan *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("INDEXSCAN");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2005-04-25 03:30:14 +02:00
|
|
|
WRITE_OID_FIELD(indexid);
|
|
|
|
WRITE_NODE_FIELD(indexqual);
|
|
|
|
WRITE_NODE_FIELD(indexqualorig);
|
2010-12-03 02:50:48 +01:00
|
|
|
WRITE_NODE_FIELD(indexorderby);
|
|
|
|
WRITE_NODE_FIELD(indexorderbyorig);
|
2015-05-18 03:22:12 +02:00
|
|
|
WRITE_NODE_FIELD(indexorderbyops);
|
2005-04-25 03:30:14 +02:00
|
|
|
WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
|
2011-10-11 20:20:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outIndexOnlyScan(StringInfo str, const IndexOnlyScan *node)
|
2011-10-11 20:20:06 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("INDEXONLYSCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2011-10-11 20:20:06 +02:00
|
|
|
|
|
|
|
WRITE_OID_FIELD(indexid);
|
|
|
|
WRITE_NODE_FIELD(indexqual);
|
|
|
|
WRITE_NODE_FIELD(indexorderby);
|
|
|
|
WRITE_NODE_FIELD(indextlist);
|
|
|
|
WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBitmapIndexScan(StringInfo str, const BitmapIndexScan *node)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("BITMAPINDEXSCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
2005-04-25 03:30:14 +02:00
|
|
|
WRITE_OID_FIELD(indexid);
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
WRITE_BOOL_FIELD(isshared);
|
2005-04-25 03:30:14 +02:00
|
|
|
WRITE_NODE_FIELD(indexqual);
|
|
|
|
WRITE_NODE_FIELD(indexqualorig);
|
2005-04-20 00:35:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBitmapHeapScan(StringInfo str, const BitmapHeapScan *node)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("BITMAPHEAPSCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(bitmapqualorig);
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outTidScan(StringInfo str, const TidScan *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("TIDSCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2005-11-26 23:14:57 +01:00
|
|
|
WRITE_NODE_FIELD(tidquals);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1998-02-13 04:27:47 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSubqueryScan(StringInfo str, const SubqueryScan *node)
|
1998-02-13 04:27:47 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("SUBQUERYSCAN");
|
1998-12-15 03:24:15 +01:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
1998-12-15 03:24:15 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(subplan);
|
1998-02-13 04:27:47 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outFunctionScan(StringInfo str, const FunctionScan *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("FUNCTIONSCAN");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2007-02-19 03:23:12 +01:00
|
|
|
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
WRITE_NODE_FIELD(functions);
|
2013-07-29 17:38:01 +02:00
|
|
|
WRITE_BOOL_FIELD(funcordinality);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2017-03-08 16:39:37 +01:00
|
|
|
static void
|
|
|
|
_outTableFuncScan(StringInfo str, const TableFuncScan *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("TABLEFUNCSCAN");
|
|
|
|
|
|
|
|
_outScanInfo(str, (const Scan *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(tablefunc);
|
|
|
|
}
|
|
|
|
|
2006-08-02 03:59:48 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outValuesScan(StringInfo str, const ValuesScan *node)
|
2006-08-02 03:59:48 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("VALUESSCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2007-02-19 03:23:12 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(values_lists);
|
2006-08-02 03:59:48 +02:00
|
|
|
}
|
|
|
|
|
2008-10-04 23:56:55 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCteScan(StringInfo str, const CteScan *node)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CTESCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2008-10-04 23:56:55 +02:00
|
|
|
|
|
|
|
WRITE_INT_FIELD(ctePlanId);
|
|
|
|
WRITE_INT_FIELD(cteParam);
|
|
|
|
}
|
|
|
|
|
2017-04-01 06:17:18 +02:00
|
|
|
static void
|
|
|
|
_outNamedTuplestoreScan(StringInfo str, const NamedTuplestoreScan *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("NAMEDTUPLESTORESCAN");
|
|
|
|
|
|
|
|
_outScanInfo(str, (const Scan *) node);
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(enrname);
|
|
|
|
}
|
|
|
|
|
2008-10-04 23:56:55 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outWorkTableScan(StringInfo str, const WorkTableScan *node)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("WORKTABLESCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2008-10-04 23:56:55 +02:00
|
|
|
|
|
|
|
WRITE_INT_FIELD(wtParam);
|
|
|
|
}
|
|
|
|
|
2011-02-20 06:17:18 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outForeignScan(StringInfo str, const ForeignScan *node)
|
2011-02-20 06:17:18 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("FOREIGNSCAN");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2011-02-20 06:17:18 +01:00
|
|
|
|
2016-03-18 18:48:58 +01:00
|
|
|
WRITE_ENUM_FIELD(operation, CmdType);
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
WRITE_OID_FIELD(fs_server);
|
Revise FDW planning API, again.
Further reflection shows that a single callback isn't very workable if we
desire to let FDWs generate multiple Paths, because that forces the FDW to
do all work necessary to generate a valid Plan node for each Path. Instead
split the former PlanForeignScan API into three steps: GetForeignRelSize,
GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking
the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain,
and it's substantially more flexible for complex FDWs.
Add an fdw_private field to RelOptInfo so that the new functions can save
state there rather than possibly having to recalculate information two or
three times.
In addition, we'd not thought through what would be needed to allow an FDW
to set up subexpressions of its choice for runtime execution. We could
treat ForeignScan.fdw_private as an executable expression but that seems
likely to break existing FDWs unnecessarily (in particular, it would
restrict the set of node types allowable in fdw_private to those supported
by expression_tree_walker). Instead, invent a separate field fdw_exprs
which will receive the postprocessing appropriate for expression trees.
(One field is enough since it can be a list of expressions; also, we assume
the corresponding expression state tree(s) will be held within fdw_state,
so we don't need to add anything to ForeignScanState.)
Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this
further as we continue to work on that patch, but to me it feels a lot
closer to being right now.
2012-03-09 18:48:48 +01:00
|
|
|
WRITE_NODE_FIELD(fdw_exprs);
|
2011-02-20 06:17:18 +01:00
|
|
|
WRITE_NODE_FIELD(fdw_private);
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
WRITE_NODE_FIELD(fdw_scan_tlist);
|
2015-10-15 19:00:40 +02:00
|
|
|
WRITE_NODE_FIELD(fdw_recheck_quals);
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(fs_relids);
|
Revise FDW planning API, again.
Further reflection shows that a single callback isn't very workable if we
desire to let FDWs generate multiple Paths, because that forces the FDW to
do all work necessary to generate a valid Plan node for each Path. Instead
split the former PlanForeignScan API into three steps: GetForeignRelSize,
GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking
the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain,
and it's substantially more flexible for complex FDWs.
Add an fdw_private field to RelOptInfo so that the new functions can save
state there rather than possibly having to recalculate information two or
three times.
In addition, we'd not thought through what would be needed to allow an FDW
to set up subexpressions of its choice for runtime execution. We could
treat ForeignScan.fdw_private as an executable expression but that seems
likely to break existing FDWs unnecessarily (in particular, it would
restrict the set of node types allowable in fdw_private to those supported
by expression_tree_walker). Instead, invent a separate field fdw_exprs
which will receive the postprocessing appropriate for expression trees.
(One field is enough since it can be a list of expressions; also, we assume
the corresponding expression state tree(s) will be held within fdw_state,
so we don't need to add anything to ForeignScanState.)
Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this
further as we continue to work on that patch, but to me it feels a lot
closer to being right now.
2012-03-09 18:48:48 +01:00
|
|
|
WRITE_BOOL_FIELD(fsSystemCol);
|
2011-02-20 06:17:18 +01:00
|
|
|
}
|
|
|
|
|
2014-11-07 23:26:02 +01:00
|
|
|
static void
|
|
|
|
_outCustomScan(StringInfo str, const CustomScan *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CUSTOMSCAN");
|
|
|
|
|
|
|
|
_outScanInfo(str, (const Scan *) node);
|
2014-11-22 00:21:46 +01:00
|
|
|
|
2014-11-07 23:26:02 +01:00
|
|
|
WRITE_UINT_FIELD(flags);
|
2015-08-06 02:44:27 +02:00
|
|
|
WRITE_NODE_FIELD(custom_plans);
|
2014-11-22 00:21:46 +01:00
|
|
|
WRITE_NODE_FIELD(custom_exprs);
|
|
|
|
WRITE_NODE_FIELD(custom_private);
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
WRITE_NODE_FIELD(custom_scan_tlist);
|
2015-05-01 14:50:35 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(custom_relids);
|
2016-03-29 17:00:18 +02:00
|
|
|
/* CustomName is a key to lookup CustomScanMethods */
|
2014-11-22 00:21:46 +01:00
|
|
|
appendStringInfoString(str, " :methods ");
|
2016-09-16 15:36:19 +02:00
|
|
|
outToken(str, node->methods->CustomName);
|
2014-11-07 23:26:02 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoin(StringInfo str, const Join *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("JOIN");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPlanInfo(str, (const Join *) node);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outNestLoop(StringInfo str, const NestLoop *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("NESTLOOP");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPlanInfo(str, (const Join *) node);
|
2010-07-12 19:01:06 +02:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(nestParams);
|
2002-11-25 19:12:12 +01:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outMergeJoin(StringInfo str, const MergeJoin *node)
|
2002-11-25 19:12:12 +01:00
|
|
|
{
|
2007-01-10 19:06:05 +01:00
|
|
|
int numCols;
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("MERGEJOIN");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPlanInfo(str, (const Join *) node);
|
1998-11-22 11:48:45 +01:00
|
|
|
|
2017-04-08 04:20:03 +02:00
|
|
|
WRITE_BOOL_FIELD(skip_mark_restore);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(mergeclauses);
|
2007-01-10 19:06:05 +01:00
|
|
|
|
|
|
|
numCols = list_length(node->mergeclauses);
|
|
|
|
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_OID_ARRAY(mergeFamilies, numCols);
|
|
|
|
WRITE_OID_ARRAY(mergeCollations, numCols);
|
|
|
|
WRITE_INT_ARRAY(mergeStrategies, numCols);
|
|
|
|
WRITE_BOOL_ARRAY(mergeNullsFirst, numCols);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1999-11-23 21:07:06 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outHashJoin(StringInfo str, const HashJoin *node)
|
1999-11-23 21:07:06 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("HASHJOIN");
|
1999-11-23 21:07:06 +01:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPlanInfo(str, (const Join *) node);
|
1999-11-23 21:07:06 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(hashclauses);
|
1999-11-23 21:07:06 +01:00
|
|
|
}
|
|
|
|
|
2000-09-29 20:21:41 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAgg(StringInfo str, const Agg *node)
|
2000-09-29 20:21:41 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("AGG");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2000-09-29 20:21:41 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
|
2016-06-26 20:33:38 +02:00
|
|
|
WRITE_ENUM_FIELD(aggsplit, AggSplit);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_INT_FIELD(numCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(grpOperators, node->numCols);
|
2019-03-22 12:09:32 +01:00
|
|
|
WRITE_OID_ARRAY(grpCollations, node->numCols);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_LONG_FIELD(numGroups);
|
2016-08-24 20:37:50 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(aggParams);
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
WRITE_NODE_FIELD(groupingSets);
|
|
|
|
WRITE_NODE_FIELD(chain);
|
2000-09-29 20:21:41 +02:00
|
|
|
}
|
|
|
|
|
2008-12-28 19:54:01 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outWindowAgg(StringInfo str, const WindowAgg *node)
|
2008-12-28 19:54:01 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("WINDOWAGG");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2008-12-28 19:54:01 +01:00
|
|
|
|
2008-12-31 01:08:39 +01:00
|
|
|
WRITE_UINT_FIELD(winref);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_INT_FIELD(partNumCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols);
|
|
|
|
WRITE_OID_ARRAY(partOperators, node->partNumCols);
|
2019-03-22 12:09:32 +01:00
|
|
|
WRITE_OID_ARRAY(partCollations, node->partNumCols);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_INT_FIELD(ordNumCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols);
|
|
|
|
WRITE_OID_ARRAY(ordOperators, node->ordNumCols);
|
2019-03-22 12:09:32 +01:00
|
|
|
WRITE_OID_ARRAY(ordCollations, node->ordNumCols);
|
2008-12-31 01:08:39 +01:00
|
|
|
WRITE_INT_FIELD(frameOptions);
|
2010-02-12 18:33:21 +01:00
|
|
|
WRITE_NODE_FIELD(startOffset);
|
|
|
|
WRITE_NODE_FIELD(endOffset);
|
Support all SQL:2011 options for window frame clauses.
This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING"
frame boundaries in window functions. We'd punted on that back in the
original patch to add window functions, because it was not clear how to
do it in a reasonably data-type-extensible fashion. That problem is
resolved here by adding the ability for btree operator classes to provide
an "in_range" support function that defines how to add or subtract the
RANGE offset value. Factoring it this way also allows the operator class
to avoid overflow problems near the ends of the datatype's range, if it
wishes to expend effort on that. (In the committed patch, the integer
opclasses handle that issue, but it did not seem worth the trouble to
avoid overflow failures for datetime types.)
The patch includes in_range support for the integer_ops opfamily
(int2/int4/int8) as well as the standard datetime types. Support for
other numeric types has been requested, but that seems like suitable
material for a follow-on patch.
In addition, the patch adds GROUPS mode which counts the offset in
ORDER-BY peer groups rather than rows, and it adds the frame_exclusion
options specified by SQL:2011. As far as I can see, we are now fully
up to spec on window framing options.
Existing behaviors remain unchanged, except that I changed the errcode
for a couple of existing error reports to meet the SQL spec's expectation
that negative "offset" values should be reported as SQLSTATE 22013.
Internally and in relevant parts of the documentation, we now consistently
use the terminology "offset PRECEDING/FOLLOWING" rather than "value
PRECEDING/FOLLOWING", since the term "value" is confusingly vague.
Oliver Ford, reviewed and whacked around some by me
Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
2018-02-07 06:06:50 +01:00
|
|
|
WRITE_OID_FIELD(startInRangeFunc);
|
|
|
|
WRITE_OID_FIELD(endInRangeFunc);
|
|
|
|
WRITE_OID_FIELD(inRangeColl);
|
|
|
|
WRITE_BOOL_FIELD(inRangeAsc);
|
|
|
|
WRITE_BOOL_FIELD(inRangeNullsFirst);
|
2008-12-28 19:54:01 +01:00
|
|
|
}
|
|
|
|
|
2002-05-12 22:10:05 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outGroup(StringInfo str, const Group *node)
|
2002-05-12 22:10:05 +02:00
|
|
|
{
|
2003-05-06 02:20:33 +02:00
|
|
|
WRITE_NODE_TYPE("GROUP");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2002-05-12 22:10:05 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_INT_FIELD(numCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(grpOperators, node->numCols);
|
2019-03-22 12:09:32 +01:00
|
|
|
WRITE_OID_ARRAY(grpCollations, node->numCols);
|
2002-05-12 22:10:05 +02:00
|
|
|
}
|
|
|
|
|
2000-03-24 03:58:25 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outMaterial(StringInfo str, const Material *node)
|
2000-03-24 03:58:25 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("MATERIAL");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2000-03-24 03:58:25 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSort(StringInfo str, const Sort *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("SORT");
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2003-05-06 02:20:33 +02:00
|
|
|
WRITE_INT_FIELD(numCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(sortColIdx, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(sortOperators, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(collations, node->numCols);
|
|
|
|
WRITE_BOOL_ARRAY(nullsFirst, node->numCols);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outUnique(StringInfo str, const Unique *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("UNIQUE");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_INT_FIELD(numCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(uniqOperators, node->numCols);
|
2019-03-22 12:09:32 +01:00
|
|
|
WRITE_OID_ARRAY(uniqCollations, node->numCols);
|
2000-04-27 01:39:10 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2008-09-09 20:58:09 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outHash(StringInfo str, const Hash *node)
|
2008-09-09 20:58:09 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("HASH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2009-03-21 01:04:40 +01:00
|
|
|
|
|
|
|
WRITE_OID_FIELD(skewTable);
|
|
|
|
WRITE_INT_FIELD(skewColumn);
|
2009-12-29 21:11:45 +01:00
|
|
|
WRITE_BOOL_FIELD(skewInherit);
|
Add parallel-aware hash joins.
Introduce parallel-aware hash joins that appear in EXPLAIN plans as Parallel
Hash Join with Parallel Hash. While hash joins could already appear in
parallel queries, they were previously always parallel-oblivious and had a
partial subplan only on the outer side, meaning that the work of the inner
subplan was duplicated in every worker.
After this commit, the planner will consider using a partial subplan on the
inner side too, using the Parallel Hash node to divide the work over the
available CPU cores and combine its results in shared memory. If the join
needs to be split into multiple batches in order to respect work_mem, then
workers process different batches as much as possible and then work together
on the remaining batches.
The advantages of a parallel-aware hash join over a parallel-oblivious hash
join used in a parallel query are that it:
* avoids wasting memory on duplicated hash tables
* avoids wasting disk space on duplicated batch files
* divides the work of building the hash table over the CPUs
One disadvantage is that there is some communication between the participating
CPUs which might outweigh the benefits of parallelism in the case of small
hash tables. This is avoided by the planner's existing reluctance to supply
partial plans for small scans, but it may be necessary to estimate
synchronization costs in future if that situation changes. Another is that
outer batch 0 must be written to disk if multiple batches are required.
A potential future advantage of parallel-aware hash joins is that right and
full outer joins could be supported, since there is a single set of matched
bits for each hashtable, but that is not yet implemented.
A new GUC enable_parallel_hash is defined to control the feature, defaulting
to on.
Author: Thomas Munro
Reviewed-By: Andres Freund, Robert Haas
Tested-By: Rafia Sabih, Prabhat Sahu
Discussion:
https://postgr.es/m/CAEepm=2W=cOkiZxcg6qiFQP-dHUe09aqTrEMM7yJDrHMhDv_RA@mail.gmail.com
https://postgr.es/m/CAEepm=37HKyJ4U6XOLi=JgfSHM3o6B-GaeO-6hkOmneTDkH+Uw@mail.gmail.com
2017-12-21 08:39:21 +01:00
|
|
|
WRITE_FLOAT_FIELD(rows_total, "%.0f");
|
2008-09-09 20:58:09 +02:00
|
|
|
}
|
|
|
|
|
2000-10-05 21:11:39 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSetOp(StringInfo str, const SetOp *node)
|
2000-10-05 21:11:39 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("SETOP");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2000-10-05 21:11:39 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_ENUM_FIELD(cmd, SetOpCmd);
|
2008-08-07 05:04:04 +02:00
|
|
|
WRITE_ENUM_FIELD(strategy, SetOpStrategy);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_INT_FIELD(numCols);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
|
|
|
|
WRITE_OID_ARRAY(dupOperators, node->numCols);
|
2019-03-22 12:09:32 +01:00
|
|
|
WRITE_OID_ARRAY(dupCollations, node->numCols);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_INT_FIELD(flagColIdx);
|
2008-08-07 21:35:02 +02:00
|
|
|
WRITE_INT_FIELD(firstFlag);
|
2008-08-07 05:04:04 +02:00
|
|
|
WRITE_LONG_FIELD(numGroups);
|
2000-10-05 21:11:39 +02:00
|
|
|
}
|
|
|
|
|
2009-10-12 20:10:51 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outLockRows(StringInfo str, const LockRows *node)
|
2009-10-12 20:10:51 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("LOCKROWS");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2009-10-12 20:10:51 +02:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(rowMarks);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
WRITE_INT_FIELD(epqParam);
|
2009-10-12 20:10:51 +02:00
|
|
|
}
|
|
|
|
|
2000-10-26 23:38:24 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outLimit(StringInfo str, const Limit *node)
|
2000-10-26 23:38:24 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("LIMIT");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInfo(str, (const Plan *) node);
|
2000-10-26 23:38:24 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(limitOffset);
|
|
|
|
WRITE_NODE_FIELD(limitCount);
|
2000-10-26 23:38:24 +02:00
|
|
|
}
|
|
|
|
|
2010-07-12 19:01:06 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outNestLoopParam(StringInfo str, const NestLoopParam *node)
|
2010-07-12 19:01:06 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("NESTLOOPPARAM");
|
|
|
|
|
|
|
|
WRITE_INT_FIELD(paramno);
|
|
|
|
WRITE_NODE_FIELD(paramval);
|
|
|
|
}
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanRowMark(StringInfo str, const PlanRowMark *node)
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PLANROWMARK");
|
|
|
|
|
|
|
|
WRITE_UINT_FIELD(rti);
|
|
|
|
WRITE_UINT_FIELD(prti);
|
2011-02-10 05:27:07 +01:00
|
|
|
WRITE_UINT_FIELD(rowmarkId);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
WRITE_ENUM_FIELD(markType, RowMarkType);
|
Improve representation of PlanRowMark.
This patch fixes two inadequacies of the PlanRowMark representation.
First, that the original LockingClauseStrength isn't stored (and cannot be
inferred for foreign tables, which always get ROW_MARK_COPY). Since some
PlanRowMarks are created out of whole cloth and don't actually have an
ancestral RowMarkClause, this requires adding a dummy LCS_NONE value to
enum LockingClauseStrength, which is fairly annoying but the alternatives
seem worse. This fix allows getting rid of the use of get_parse_rowmark()
in FDWs (as per the discussion around commits 462bd95705a0c23b and
8ec8760fc87ecde0), and it simplifies some things elsewhere.
Second, that the representation assumed that all child tables in an
inheritance hierarchy would use the same RowMarkType. That's true today
but will soon not be true. We add an "allMarkTypes" field that identifies
the union of mark types used in all a parent table's children, and use
that where appropriate (currently, only in preprocess_targetlist()).
In passing fix a couple of minor infelicities left over from the SKIP
LOCKED patch, notably that _outPlanRowMark still thought waitPolicy
is a bool.
Catversion bump is required because the numeric values of enum
LockingClauseStrength can appear in on-disk rules.
Extracted from a much larger patch to support foreign table inheritance;
it seemed worth breaking this out, since it's a separable concern.
Shigeru Hanada and Etsuro Fujita, somewhat modified by me
2015-03-15 23:41:47 +01:00
|
|
|
WRITE_INT_FIELD(allMarkTypes);
|
|
|
|
WRITE_ENUM_FIELD(strength, LockClauseStrength);
|
|
|
|
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
WRITE_BOOL_FIELD(isParent);
|
|
|
|
}
|
|
|
|
|
2018-06-10 22:30:14 +02:00
|
|
|
static void
|
|
|
|
_outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node)
|
2018-08-02 01:42:46 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PARTITIONPRUNEINFO");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(prune_infos);
|
|
|
|
WRITE_BITMAPSET_FIELD(other_subplans);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outPartitionedRelPruneInfo(StringInfo str, const PartitionedRelPruneInfo *node)
|
2018-06-10 22:30:14 +02:00
|
|
|
{
|
2018-08-02 01:42:46 +02:00
|
|
|
WRITE_NODE_TYPE("PARTITIONEDRELPRUNEINFO");
|
2018-06-10 22:30:14 +02:00
|
|
|
|
2018-10-04 20:03:37 +02:00
|
|
|
WRITE_UINT_FIELD(rtindex);
|
2018-06-10 22:30:14 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(present_parts);
|
|
|
|
WRITE_INT_FIELD(nparts);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_INT_ARRAY(subplan_map, node->nparts);
|
|
|
|
WRITE_INT_ARRAY(subpart_map, node->nparts);
|
Allow ATTACH PARTITION with only ShareUpdateExclusiveLock.
We still require AccessExclusiveLock on the partition itself, because
otherwise an insert that violates the newly-imposed partition
constraint could be in progress at the same time that we're changing
that constraint; only the lock level on the parent relation is
weakened.
To make this safe, we have to cope with (at least) three separate
problems. First, relevant DDL might commit while we're in the process
of building a PartitionDesc. If so, find_inheritance_children() might
see a new partition while the RELOID system cache still has the old
partition bound cached, and even before invalidation messages have
been queued. To fix that, if we see that the pg_class tuple seems to
be missing or to have a null relpartbound, refetch the value directly
from the table. We can't get the wrong value, because DETACH PARTITION
still requires AccessExclusiveLock throughout; if we ever want to
change that, this will need more thought. In testing, I found it quite
difficult to hit even the null-relpartbound case; the race condition
is extremely tight, but the theoretical risk is there.
Second, successive calls to RelationGetPartitionDesc might not return
the same answer. The query planner will get confused if lookup up the
PartitionDesc for a particular relation does not return a consistent
answer for the entire duration of query planning. Likewise, query
execution will get confused if the same relation seems to have a
different PartitionDesc at different times. Invent a new
PartitionDirectory concept and use it to ensure consistency. This
ensures that a single invocation of either the planner or the executor
sees the same view of the PartitionDesc from beginning to end, but it
does not guarantee that the planner and the executor see the same
view. Since this allows pointers to old PartitionDesc entries to
survive even after a relcache rebuild, also postpone removing the old
PartitionDesc entry until we're certain no one is using it.
For the most part, it seems to be OK for the planner and executor to
have different views of the PartitionDesc, because the executor will
just ignore any concurrently added partitions which were unknown at
plan time; those partitions won't be part of the inheritance
expansion, but invalidation messages will trigger replanning at some
point. Normally, this happens by the time the very next command is
executed, but if the next command acquires no locks and executes a
prepared query, it can manage not to notice until a new transaction is
started. We might want to tighten that up, but it's material for a
separate patch. There would still be a small window where a query
that started just after an ATTACH PARTITION command committed might
fail to notice its results -- but only if the command starts before
the commit has been acknowledged to the user. All in all, the warts
here around serializability seem small enough to be worth accepting
for the considerable advantage of being able to add partitions without
a full table lock.
Although in general the consequences of new partitions showing up
between planning and execution are limited to the query not noticing
the new partitions, run-time partition pruning will get confused in
that case, so that's the third problem that this patch fixes.
Run-time partition pruning assumes that indexes into the PartitionDesc
are stable between planning and execution. So, add code so that if
new partitions are added between plan time and execution time, the
indexes stored in the subplan_map[] and subpart_map[] arrays within
the plan's PartitionedRelPruneInfo get adjusted accordingly. There
does not seem to be a simple way to generalize this scheme to cope
with partitions that are removed, mostly because they could then get
added back again with different bounds, but it works OK for added
partitions.
This code does not try to ensure that every backend participating in
a parallel query sees the same view of the PartitionDesc. That
currently doesn't matter, because we never pass PartitionDesc
indexes between backends. Each backend will ignore the concurrently
added partitions which it notices, and it doesn't matter if different
backends are ignoring different sets of concurrently added partitions.
If in the future that matters, for example because we allow writes in
parallel query and want all participants to do tuple routing to the same
set of partitions, the PartitionDirectory concept could be improved to
share PartitionDescs across backends. There is a draft patch to
serialize and restore PartitionDescs on the thread where this patch
was discussed, which may be a useful place to start.
Patch by me. Thanks to Alvaro Herrera, David Rowley, Simon Riggs,
Amit Langote, and Michael Paquier for discussion, and to Alvaro
Herrera for some review.
Discussion: http://postgr.es/m/CA+Tgmobt2upbSocvvDej3yzokd7AkiT+PvgFH+a9-5VV1oJNSQ@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoZE0r9-cyA-aY6f8WFEROaDLLL7Vf81kZ8MtFCkxpeQSw@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoY13KQZF-=HNTrt9UYWYx3_oYOQpu9ioNT49jGgiDpUEA@mail.gmail.com
2019-03-07 17:13:12 +01:00
|
|
|
WRITE_OID_ARRAY(relid_map, node->nparts);
|
Restructure creation of run-time pruning steps.
Previously, gen_partprune_steps() always built executor pruning steps
using all suitable clauses, including those containing PARAM_EXEC
Params. This meant that the pruning steps were only completely safe
for executor run-time (scan start) pruning. To prune at executor
startup, we had to ignore the steps involving exec Params. But this
doesn't really work in general, since there may be logic changes
needed as well --- for example, pruning according to the last operator's
btree strategy is the wrong thing if we're not applying that operator.
The rules embodied in gen_partprune_steps() and its minions are
sufficiently complicated that tracking their incremental effects in
other logic seems quite impractical.
Short of a complete redesign, the only safe fix seems to be to run
gen_partprune_steps() twice, once to create executor startup pruning
steps and then again for run-time pruning steps. We can save a few
cycles however by noting during the first scan whether we rejected
any clauses because they involved exec Params --- if not, we don't
need to do the second scan.
In support of this, refactor the internal APIs in partprune.c to make
more use of passing information in the GeneratePruningStepsContext
struct, rather than as separate arguments.
This is, I hope, the last piece of our response to a bug report from
Alan Jackson. Back-patch to v11 where this code came in.
Discussion: https://postgr.es/m/FAD28A83-AC73-489E-A058-2681FA31D648@tvsquared.com
2019-05-18 01:44:19 +02:00
|
|
|
WRITE_NODE_FIELD(initial_pruning_steps);
|
|
|
|
WRITE_NODE_FIELD(exec_pruning_steps);
|
2018-06-10 22:30:14 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(execparamids);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outPartitionPruneStepOp(StringInfo str, const PartitionPruneStepOp *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PARTITIONPRUNESTEPOP");
|
|
|
|
|
|
|
|
WRITE_INT_FIELD(step.step_id);
|
|
|
|
WRITE_INT_FIELD(opstrategy);
|
|
|
|
WRITE_NODE_FIELD(exprs);
|
|
|
|
WRITE_NODE_FIELD(cmpfns);
|
|
|
|
WRITE_BITMAPSET_FIELD(nullkeys);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outPartitionPruneStepCombine(StringInfo str, const PartitionPruneStepCombine *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PARTITIONPRUNESTEPCOMBINE");
|
|
|
|
|
|
|
|
WRITE_INT_FIELD(step.step_id);
|
|
|
|
WRITE_ENUM_FIELD(combineOp, PartitionPruneCombineOp);
|
|
|
|
WRITE_NODE_FIELD(source_stepids);
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlanInvalItem(StringInfo str, const PlanInvalItem *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2008-09-09 20:58:09 +02:00
|
|
|
WRITE_NODE_TYPE("PLANINVALITEM");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2008-09-09 20:58:09 +02:00
|
|
|
WRITE_INT_FIELD(cacheId);
|
2011-08-17 01:27:46 +02:00
|
|
|
WRITE_UINT_FIELD(hashValue);
|
2002-11-25 19:12:12 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* Stuff from primnodes.h.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAlias(StringInfo str, const Alias *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("ALIAS");
|
1998-12-15 05:00:46 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_STRING_FIELD(aliasname);
|
|
|
|
WRITE_NODE_FIELD(colnames);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRangeVar(StringInfo str, const RangeVar *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("RANGEVAR");
|
1998-12-15 05:00:46 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*
|
|
|
|
* we deliberately ignore catalogname here, since it is presently not
|
|
|
|
* semantically meaningful
|
|
|
|
*/
|
|
|
|
WRITE_STRING_FIELD(schemaname);
|
|
|
|
WRITE_STRING_FIELD(relname);
|
2016-12-23 19:35:11 +01:00
|
|
|
WRITE_BOOL_FIELD(inh);
|
2010-12-13 18:34:26 +01:00
|
|
|
WRITE_CHAR_FIELD(relpersistence);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(alias);
|
2008-09-01 22:42:46 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
2017-03-08 16:39:37 +01:00
|
|
|
static void
|
|
|
|
_outTableFunc(StringInfo str, const TableFunc *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("TABLEFUNC");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(ns_uris);
|
2017-06-16 09:16:11 +02:00
|
|
|
WRITE_NODE_FIELD(ns_names);
|
2017-03-08 16:39:37 +01:00
|
|
|
WRITE_NODE_FIELD(docexpr);
|
|
|
|
WRITE_NODE_FIELD(rowexpr);
|
|
|
|
WRITE_NODE_FIELD(colnames);
|
|
|
|
WRITE_NODE_FIELD(coltypes);
|
|
|
|
WRITE_NODE_FIELD(coltypmods);
|
|
|
|
WRITE_NODE_FIELD(colcollations);
|
|
|
|
WRITE_NODE_FIELD(colexprs);
|
|
|
|
WRITE_NODE_FIELD(coldefexprs);
|
|
|
|
WRITE_BITMAPSET_FIELD(notnulls);
|
|
|
|
WRITE_INT_FIELD(ordinalitycol);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2007-02-20 18:32:18 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outIntoClause(StringInfo str, const IntoClause *node)
|
2007-02-20 18:32:18 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("INTOCLAUSE");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(rel);
|
|
|
|
WRITE_NODE_FIELD(colNames);
|
2019-06-09 23:00:36 +02:00
|
|
|
WRITE_STRING_FIELD(accessMethod);
|
2007-02-20 18:32:18 +01:00
|
|
|
WRITE_NODE_FIELD(options);
|
|
|
|
WRITE_ENUM_FIELD(onCommit, OnCommitAction);
|
|
|
|
WRITE_STRING_FIELD(tableSpaceName);
|
Clean up the mess around EXPLAIN and materialized views.
Revert the matview-related changes in explain.c's API, as per recent
complaint from Robert Haas. The reason for these appears to have been
principally some ill-considered choices around having intorel_startup do
what ought to be parse-time checking, plus a poor arrangement for passing
it the view parsetree it needs to store into pg_rewrite when creating a
materialized view. Do the latter by having parse analysis stick a copy
into the IntoClause, instead of doing it at runtime. (On the whole,
I seriously question the choice to represent CREATE MATERIALIZED VIEW as a
variant of SELECT INTO/CREATE TABLE AS, because that means injecting even
more complexity into what was already a horrid legacy kluge. However,
I didn't go so far as to rethink that choice ... yet.)
I also moved several error checks into matview parse analysis, and
made the check for external Params in a matview more accurate.
In passing, clean things up a bit more around interpretOidsOption(),
and fix things so that we can use that to force no-oids for views,
sequences, etc, thereby eliminating the need to cons up "oids = false"
options when creating them.
catversion bump due to change in IntoClause. (I wonder though if we
really need readfuncs/outfuncs support for IntoClause anymore.)
2013-04-13 01:25:20 +02:00
|
|
|
WRITE_NODE_FIELD(viewQuery);
|
2011-11-25 05:21:06 +01:00
|
|
|
WRITE_BOOL_FIELD(skipData);
|
2007-02-20 18:32:18 +01:00
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outVar(StringInfo str, const Var *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("VAR");
|
|
|
|
|
|
|
|
WRITE_UINT_FIELD(varno);
|
|
|
|
WRITE_INT_FIELD(varattno);
|
|
|
|
WRITE_OID_FIELD(vartype);
|
|
|
|
WRITE_INT_FIELD(vartypmod);
|
2011-02-08 22:04:18 +01:00
|
|
|
WRITE_OID_FIELD(varcollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_UINT_FIELD(varlevelsup);
|
|
|
|
WRITE_UINT_FIELD(varnoold);
|
|
|
|
WRITE_INT_FIELD(varoattno);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outConst(StringInfo str, const Const *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("CONST");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(consttype);
|
2007-03-17 01:11:05 +01:00
|
|
|
WRITE_INT_FIELD(consttypmod);
|
2011-02-08 22:04:18 +01:00
|
|
|
WRITE_OID_FIELD(constcollid);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_INT_FIELD(constlen);
|
|
|
|
WRITE_BOOL_FIELD(constbyval);
|
|
|
|
WRITE_BOOL_FIELD(constisnull);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
1998-12-15 05:00:46 +01:00
|
|
|
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " :constvalue ");
|
1997-09-07 07:04:48 +02:00
|
|
|
if (node->constisnull)
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "<>");
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
2016-04-08 23:26:36 +02:00
|
|
|
outDatum(str, node->constvalue, node->constlen, node->constbyval);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outParam(StringInfo str, const Param *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PARAM");
|
|
|
|
|
2006-04-22 03:26:01 +02:00
|
|
|
WRITE_ENUM_FIELD(paramkind, ParamKind);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_INT_FIELD(paramid);
|
|
|
|
WRITE_OID_FIELD(paramtype);
|
2006-12-10 23:13:27 +01:00
|
|
|
WRITE_INT_FIELD(paramtypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(paramcollid);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAggref(StringInfo str, const Aggref *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("AGGREF");
|
1998-12-15 05:00:46 +01:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_OID_FIELD(aggfnoid);
|
|
|
|
WRITE_OID_FIELD(aggtype);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(aggcollid);
|
|
|
|
WRITE_OID_FIELD(inputcollid);
|
Fix handling of argument and result datatypes for partial aggregation.
When doing partial aggregation, the args list of the upper (combining)
Aggref node is replaced by a Var representing the output of the partial
aggregation steps, which has either the aggregate's transition data type
or a serialized representation of that. However, nodeAgg.c blindly
continued to use the args list as an indication of the user-level argument
types. This broke resolution of polymorphic transition datatypes at
executor startup (though it accidentally failed to fail for the ANYARRAY
case, which is likely the only one anyone had tested). Moreover, the
constructed FuncExpr passed to the finalfunc contained completely wrong
information, which would have led to bogus answers or crashes for any case
where the finalfunc examined that information (which is only likely to be
with polymorphic aggregates using a non-polymorphic transition type).
As an independent bug, apply_partialaggref_adjustment neglected to resolve
a polymorphic transition datatype before assigning it as the output type
of the lower-level Aggref node. This again accidentally failed to fail
for ANYARRAY but would be unlikely to work in other cases.
To fix the first problem, record the user-level argument types in a
separate OID-list field of Aggref, and look to that rather than the args
list when asking what the argument types were. (It turns out to be
convenient to include any "direct" arguments in this list too, although
those are not currently subject to being overwritten.)
Rather than adding yet another resolve_aggregate_transtype() call to fix
the second problem, add an aggtranstype field to Aggref, and store the
resolved transition type OID there when the planner first computes it.
(By doing this in the planner and not the parser, we can allow the
aggregate's transition type to change from time to time, although no DDL
support yet exists for that.) This saves nothing of consequence for
simple non-polymorphic aggregates, but for polymorphic transition types
we save a catalog lookup during executor startup as well as several
planner lookups that are new in 9.6 due to parallel query planning.
In passing, fix an error that was introduced into count_agg_clauses_walker
some time ago: it was applying exprTypmod() to something that wasn't an
expression node at all, but a TargetEntry. exprTypmod silently returned
-1 so that there was not an obvious failure, but this broke the intended
sensitivity of aggregate space consumption estimates to the typmod of
varchar and similar data types. This part needs to be back-patched.
Catversion bump due to change of stored Aggref nodes.
Discussion: <8229.1466109074@sss.pgh.pa.us>
2016-06-18 03:44:37 +02:00
|
|
|
WRITE_OID_FIELD(aggtranstype);
|
|
|
|
WRITE_NODE_FIELD(aggargtypes);
|
Support ordered-set (WITHIN GROUP) aggregates.
This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()). We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.
Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions. To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c. This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need. There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.
In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates. Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.
Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
|
|
|
WRITE_NODE_FIELD(aggdirectargs);
|
2006-07-27 21:52:07 +02:00
|
|
|
WRITE_NODE_FIELD(args);
|
2009-12-15 18:57:48 +01:00
|
|
|
WRITE_NODE_FIELD(aggorder);
|
|
|
|
WRITE_NODE_FIELD(aggdistinct);
|
2013-07-17 02:15:36 +02:00
|
|
|
WRITE_NODE_FIELD(aggfilter);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_BOOL_FIELD(aggstar);
|
Allow aggregate functions to be VARIADIC.
There's no inherent reason why an aggregate function can't be variadic
(even VARIADIC ANY) if its transition function can handle the case.
Indeed, this patch to add the feature touches none of the planner or
executor, and little of the parser; the main missing stuff was DDL and
pg_dump support.
It is true that variadic aggregates can create the same sort of ambiguity
about parameters versus ORDER BY keys that was complained of when we
(briefly) had both one- and two-argument forms of string_agg(). However,
the policy formed in response to that discussion only said that we'd not
create any built-in aggregates with varying numbers of arguments, not that
we shouldn't allow users to do it. So the logical extension of that is
we can allow users to make variadic aggregates as long as we're wary about
shipping any such in core.
In passing, this patch allows aggregate function arguments to be named, to
the extent of remembering the names in pg_proc and dumping them in pg_dump.
You can't yet call an aggregate using named-parameter notation. That seems
like a likely future extension, but it'll take some work, and it's not what
this patch is really about. Likewise, there's still some work needed to
make window functions handle VARIADIC fully, but I left that for another
day.
initdb forced because of new aggvariadic field in Aggref parse nodes.
2013-09-03 23:08:38 +02:00
|
|
|
WRITE_BOOL_FIELD(aggvariadic);
|
Support ordered-set (WITHIN GROUP) aggregates.
This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()). We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.
Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions. To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c. This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need. There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.
In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates. Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.
Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
|
|
|
WRITE_CHAR_FIELD(aggkind);
|
2009-12-15 18:57:48 +01:00
|
|
|
WRITE_UINT_FIELD(agglevelsup);
|
2016-06-26 20:33:38 +02:00
|
|
|
WRITE_ENUM_FIELD(aggsplit, AggSplit);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
1998-01-17 05:53:46 +01:00
|
|
|
}
|
|
|
|
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
static void
|
|
|
|
_outGroupingFunc(StringInfo str, const GroupingFunc *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GROUPINGFUNC");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(args);
|
|
|
|
WRITE_NODE_FIELD(refs);
|
|
|
|
WRITE_NODE_FIELD(cols);
|
2015-08-06 02:44:27 +02:00
|
|
|
WRITE_UINT_FIELD(agglevelsup);
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2008-12-28 19:54:01 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outWindowFunc(StringInfo str, const WindowFunc *node)
|
2008-12-28 19:54:01 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("WINDOWFUNC");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(winfnoid);
|
|
|
|
WRITE_OID_FIELD(wintype);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(wincollid);
|
|
|
|
WRITE_OID_FIELD(inputcollid);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_NODE_FIELD(args);
|
2013-07-17 02:15:36 +02:00
|
|
|
WRITE_NODE_FIELD(aggfilter);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_UINT_FIELD(winref);
|
|
|
|
WRITE_BOOL_FIELD(winstar);
|
|
|
|
WRITE_BOOL_FIELD(winagg);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2019-02-01 16:50:32 +01:00
|
|
|
_outSubscriptingRef(StringInfo str, const SubscriptingRef *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2019-02-01 16:50:32 +01:00
|
|
|
WRITE_NODE_TYPE("SUBSCRIPTINGREF");
|
1998-12-15 05:00:46 +01:00
|
|
|
|
2019-02-01 16:50:32 +01:00
|
|
|
WRITE_OID_FIELD(refcontainertype);
|
2003-04-09 01:20:04 +02:00
|
|
|
WRITE_OID_FIELD(refelemtype);
|
2007-03-17 01:11:05 +01:00
|
|
|
WRITE_INT_FIELD(reftypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(refcollid);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(refupperindexpr);
|
|
|
|
WRITE_NODE_FIELD(reflowerindexpr);
|
|
|
|
WRITE_NODE_FIELD(refexpr);
|
|
|
|
WRITE_NODE_FIELD(refassgnexpr);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outFuncExpr(StringInfo str, const FuncExpr *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("FUNCEXPR");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
WRITE_OID_FIELD(funcid);
|
|
|
|
WRITE_OID_FIELD(funcresulttype);
|
|
|
|
WRITE_BOOL_FIELD(funcretset);
|
2013-01-22 02:25:26 +01:00
|
|
|
WRITE_BOOL_FIELD(funcvariadic);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_ENUM_FIELD(funcformat, CoercionForm);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(funccollid);
|
|
|
|
WRITE_OID_FIELD(inputcollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
2009-10-08 04:39:25 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outNamedArgExpr(StringInfo str, const NamedArgExpr *node)
|
2009-10-08 04:39:25 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("NAMEDARGEXPR");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_STRING_FIELD(name);
|
|
|
|
WRITE_INT_FIELD(argnumber);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outOpExpr(StringInfo str, const OpExpr *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("OPEXPR");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(opno);
|
|
|
|
WRITE_OID_FIELD(opfuncid);
|
|
|
|
WRITE_OID_FIELD(opresulttype);
|
|
|
|
WRITE_BOOL_FIELD(opretset);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(opcollid);
|
|
|
|
WRITE_OID_FIELD(inputcollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outDistinctExpr(StringInfo str, const DistinctExpr *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("DISTINCTEXPR");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
WRITE_OID_FIELD(opno);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_OID_FIELD(opfuncid);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_OID_FIELD(opresulttype);
|
|
|
|
WRITE_BOOL_FIELD(opretset);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(opcollid);
|
|
|
|
WRITE_OID_FIELD(inputcollid);
|
|
|
|
WRITE_NODE_FIELD(args);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outNullIfExpr(StringInfo str, const NullIfExpr *node)
|
2011-03-20 01:29:08 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("NULLIFEXPR");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(opno);
|
|
|
|
WRITE_OID_FIELD(opfuncid);
|
|
|
|
WRITE_OID_FIELD(opresulttype);
|
|
|
|
WRITE_BOOL_FIELD(opretset);
|
|
|
|
WRITE_OID_FIELD(opcollid);
|
|
|
|
WRITE_OID_FIELD(inputcollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2003-06-29 02:33:44 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outScalarArrayOpExpr(StringInfo str, const ScalarArrayOpExpr *node)
|
2003-06-29 02:33:44 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SCALARARRAYOPEXPR");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(opno);
|
|
|
|
WRITE_OID_FIELD(opfuncid);
|
|
|
|
WRITE_BOOL_FIELD(useOr);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(inputcollid);
|
2003-06-29 02:33:44 +02:00
|
|
|
WRITE_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2003-06-29 02:33:44 +02:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBoolExpr(StringInfo str, const BoolExpr *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
char *opstr = NULL;
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("BOOLEXPR");
|
|
|
|
|
|
|
|
/* do-it-yourself enum representation */
|
|
|
|
switch (node->boolop)
|
|
|
|
{
|
|
|
|
case AND_EXPR:
|
|
|
|
opstr = "and";
|
|
|
|
break;
|
|
|
|
case OR_EXPR:
|
|
|
|
opstr = "or";
|
|
|
|
break;
|
|
|
|
case NOT_EXPR:
|
|
|
|
opstr = "not";
|
|
|
|
break;
|
|
|
|
}
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " :boolop ");
|
2016-09-16 15:36:19 +02:00
|
|
|
outToken(str, opstr);
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSubLink(StringInfo str, const SubLink *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SUBLINK");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(subLinkType, SubLinkType);
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
WRITE_INT_FIELD(subLinkId);
|
2005-12-28 02:30:02 +01:00
|
|
|
WRITE_NODE_FIELD(testexpr);
|
2003-01-10 22:08:15 +01:00
|
|
|
WRITE_NODE_FIELD(operName);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(subselect);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSubPlan(StringInfo str, const SubPlan *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
2002-12-14 01:17:59 +01:00
|
|
|
WRITE_NODE_TYPE("SUBPLAN");
|
2002-12-12 16:49:42 +01:00
|
|
|
|
2002-12-14 01:17:59 +01:00
|
|
|
WRITE_ENUM_FIELD(subLinkType, SubLinkType);
|
2005-12-28 02:30:02 +01:00
|
|
|
WRITE_NODE_FIELD(testexpr);
|
2004-05-26 06:41:50 +02:00
|
|
|
WRITE_NODE_FIELD(paramIds);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_INT_FIELD(plan_id);
|
2009-04-05 21:59:40 +02:00
|
|
|
WRITE_STRING_FIELD(plan_name);
|
2007-02-22 23:00:26 +01:00
|
|
|
WRITE_OID_FIELD(firstColType);
|
2009-03-10 23:09:26 +01:00
|
|
|
WRITE_INT_FIELD(firstColTypmod);
|
2011-02-08 22:04:18 +01:00
|
|
|
WRITE_OID_FIELD(firstColCollation);
|
2003-01-10 22:08:15 +01:00
|
|
|
WRITE_BOOL_FIELD(useHashTable);
|
|
|
|
WRITE_BOOL_FIELD(unknownEqFalse);
|
2017-02-15 00:09:47 +01:00
|
|
|
WRITE_BOOL_FIELD(parallel_safe);
|
2004-05-26 06:41:50 +02:00
|
|
|
WRITE_NODE_FIELD(setParam);
|
|
|
|
WRITE_NODE_FIELD(parParam);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(args);
|
2008-08-22 02:16:04 +02:00
|
|
|
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
|
|
|
|
WRITE_FLOAT_FIELD(per_call_cost, "%.2f");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAlternativeSubPlan(StringInfo str, const AlternativeSubPlan *node)
|
2008-08-22 02:16:04 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("ALTERNATIVESUBPLAN");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subplans);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outFieldSelect(StringInfo str, const FieldSelect *node)
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("FIELDSELECT");
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_INT_FIELD(fieldnum);
|
|
|
|
WRITE_OID_FIELD(resulttype);
|
|
|
|
WRITE_INT_FIELD(resulttypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(resultcollid);
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
|
2004-06-09 21:08:20 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outFieldStore(StringInfo str, const FieldStore *node)
|
2004-06-09 21:08:20 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("FIELDSTORE");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_NODE_FIELD(newvals);
|
|
|
|
WRITE_NODE_FIELD(fieldnums);
|
|
|
|
WRITE_OID_FIELD(resulttype);
|
|
|
|
}
|
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRelabelType(StringInfo str, const RelabelType *node)
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("RELABELTYPE");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_OID_FIELD(resulttype);
|
|
|
|
WRITE_INT_FIELD(resulttypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(resultcollid);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_ENUM_FIELD(relabelformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
|
2007-06-05 23:31:09 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCoerceViaIO(StringInfo str, const CoerceViaIO *node)
|
2007-06-05 23:31:09 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("COERCEVIAIO");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_OID_FIELD(resulttype);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(resultcollid);
|
2007-06-05 23:31:09 +02:00
|
|
|
WRITE_ENUM_FIELD(coerceformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2007-06-05 23:31:09 +02:00
|
|
|
}
|
|
|
|
|
2007-03-28 01:21:12 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outArrayCoerceExpr(StringInfo str, const ArrayCoerceExpr *node)
|
2007-03-28 01:21:12 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("ARRAYCOERCEEXPR");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
Support arrays over domains.
Allowing arrays with a domain type as their element type was left un-done
in the original domain patch, but not for any very good reason. This
omission leads to such surprising results as array_agg() not working on
a domain column, because the parser can't identify a suitable output type
for the polymorphic aggregate.
In order to fix this, first clean up the APIs of coerce_to_domain() and
some internal functions in parse_coerce.c so that we consistently pass
around a CoercionContext along with CoercionForm. Previously, we sometimes
passed an "isExplicit" boolean flag instead, which is strictly less
information; and coerce_to_domain() didn't even get that, but instead had
to reverse-engineer isExplicit from CoercionForm. That's contrary to the
documentation in primnodes.h that says that CoercionForm only affects
display and not semantics. I don't think this change fixes any live bugs,
but it makes things more consistent. The main reason for doing it though
is that now build_coercion_expression() receives ccontext, which it needs
in order to be able to recursively invoke coerce_to_target_type().
Next, reimplement ArrayCoerceExpr so that the node does not directly know
any details of what has to be done to the individual array elements while
performing the array coercion. Instead, the per-element processing is
represented by a sub-expression whose input is a source array element and
whose output is a target array element. This simplifies life in
parse_coerce.c, because it can build that sub-expression by a recursive
invocation of coerce_to_target_type(). The executor now handles the
per-element processing as a compiled expression instead of hard-wired code.
The main advantage of this is that we can use a single ArrayCoerceExpr to
handle as many as three successive steps per element: base type conversion,
typmod coercion, and domain constraint checking. The old code used two
stacked ArrayCoerceExprs to handle type + typmod coercion, which was pretty
inefficient, and adding yet another array deconstruction to do domain
constraint checking seemed very unappetizing.
In the case where we just need a single, very simple coercion function,
doing this straightforwardly leads to a noticeable increase in the
per-array-element runtime cost. Hence, add an additional shortcut evalfunc
in execExprInterp.c that skips unnecessary overhead for that specific form
of expression. The runtime speed of simple cases is within 1% or so of
where it was before, while cases that previously required two levels of
array processing are significantly faster.
Finally, create an implicit array type for every domain type, as we do for
base types, enums, etc. Everything except the array-coercion case seems
to just work without further effort.
Tom Lane, reviewed by Andrew Dunstan
Discussion: https://postgr.es/m/9852.1499791473@sss.pgh.pa.us
2017-09-30 19:40:56 +02:00
|
|
|
WRITE_NODE_FIELD(elemexpr);
|
2007-03-28 01:21:12 +02:00
|
|
|
WRITE_OID_FIELD(resulttype);
|
|
|
|
WRITE_INT_FIELD(resulttypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(resultcollid);
|
2007-03-28 01:21:12 +02:00
|
|
|
WRITE_ENUM_FIELD(coerceformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2007-03-28 01:21:12 +02:00
|
|
|
}
|
|
|
|
|
2004-12-12 00:26:51 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outConvertRowtypeExpr(StringInfo str, const ConvertRowtypeExpr *node)
|
2004-12-12 00:26:51 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CONVERTROWTYPEEXPR");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_OID_FIELD(resulttype);
|
|
|
|
WRITE_ENUM_FIELD(convertformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2004-12-12 00:26:51 +01:00
|
|
|
}
|
|
|
|
|
2011-03-11 22:27:51 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCollateExpr(StringInfo str, const CollateExpr *node)
|
2011-03-11 22:27:51 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("COLLATE");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_OID_FIELD(collOid);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCaseExpr(StringInfo str, const CaseExpr *node)
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("CASE");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_OID_FIELD(casetype);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(casecollid);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_NODE_FIELD(args);
|
|
|
|
WRITE_NODE_FIELD(defresult);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
|
2000-09-29 20:21:41 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCaseWhen(StringInfo str, const CaseWhen *node)
|
2000-09-29 20:21:41 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("WHEN");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(expr);
|
|
|
|
WRITE_NODE_FIELD(result);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2000-09-29 20:21:41 +02:00
|
|
|
}
|
|
|
|
|
2004-03-17 21:48:43 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCaseTestExpr(StringInfo str, const CaseTestExpr *node)
|
2004-03-17 21:48:43 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CASETESTEXPR");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(typeId);
|
|
|
|
WRITE_INT_FIELD(typeMod);
|
2011-02-08 22:04:18 +01:00
|
|
|
WRITE_OID_FIELD(collation);
|
2004-03-17 21:48:43 +01:00
|
|
|
}
|
|
|
|
|
2003-04-09 01:20:04 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outArrayExpr(StringInfo str, const ArrayExpr *node)
|
2003-04-09 01:20:04 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("ARRAY");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(array_typeid);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(array_collid);
|
2003-04-09 01:20:04 +02:00
|
|
|
WRITE_OID_FIELD(element_typeid);
|
|
|
|
WRITE_NODE_FIELD(elements);
|
2003-08-18 01:43:27 +02:00
|
|
|
WRITE_BOOL_FIELD(multidims);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2003-04-09 01:20:04 +02:00
|
|
|
}
|
|
|
|
|
2004-05-11 00:44:49 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRowExpr(StringInfo str, const RowExpr *node)
|
2004-05-11 00:44:49 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("ROW");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(args);
|
|
|
|
WRITE_OID_FIELD(row_typeid);
|
|
|
|
WRITE_ENUM_FIELD(row_format, CoercionForm);
|
2008-10-06 19:39:26 +02:00
|
|
|
WRITE_NODE_FIELD(colnames);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2004-05-11 00:44:49 +02:00
|
|
|
}
|
|
|
|
|
2005-12-28 02:30:02 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRowCompareExpr(StringInfo str, const RowCompareExpr *node)
|
2005-12-28 02:30:02 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("ROWCOMPARE");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(rctype, RowCompareType);
|
|
|
|
WRITE_NODE_FIELD(opnos);
|
2006-12-23 01:43:13 +01:00
|
|
|
WRITE_NODE_FIELD(opfamilies);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_NODE_FIELD(inputcollids);
|
2005-12-28 02:30:02 +01:00
|
|
|
WRITE_NODE_FIELD(largs);
|
|
|
|
WRITE_NODE_FIELD(rargs);
|
|
|
|
}
|
|
|
|
|
2003-02-16 03:30:39 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCoalesceExpr(StringInfo str, const CoalesceExpr *node)
|
2003-02-16 03:30:39 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("COALESCE");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(coalescetype);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(coalescecollid);
|
2003-02-16 03:30:39 +01:00
|
|
|
WRITE_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2003-02-16 03:30:39 +01:00
|
|
|
}
|
|
|
|
|
2005-06-27 00:05:42 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outMinMaxExpr(StringInfo str, const MinMaxExpr *node)
|
2005-06-27 00:05:42 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MINMAX");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(minmaxtype);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(minmaxcollid);
|
|
|
|
WRITE_OID_FIELD(inputcollid);
|
2005-06-27 00:05:42 +02:00
|
|
|
WRITE_ENUM_FIELD(op, MinMaxOp);
|
|
|
|
WRITE_NODE_FIELD(args);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2005-06-27 00:05:42 +02:00
|
|
|
}
|
|
|
|
|
2016-08-17 02:33:01 +02:00
|
|
|
static void
|
|
|
|
_outSQLValueFunction(StringInfo str, const SQLValueFunction *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SQLVALUEFUNCTION");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(op, SQLValueFunctionOp);
|
|
|
|
WRITE_OID_FIELD(type);
|
|
|
|
WRITE_INT_FIELD(typmod);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2006-12-24 01:29:20 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outXmlExpr(StringInfo str, const XmlExpr *node)
|
2006-12-24 01:29:20 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("XMLEXPR");
|
2007-11-15 22:14:46 +01:00
|
|
|
|
2006-12-24 01:29:20 +01:00
|
|
|
WRITE_ENUM_FIELD(op, XmlExprOp);
|
|
|
|
WRITE_STRING_FIELD(name);
|
|
|
|
WRITE_NODE_FIELD(named_args);
|
|
|
|
WRITE_NODE_FIELD(arg_names);
|
|
|
|
WRITE_NODE_FIELD(args);
|
2007-02-03 15:06:56 +01:00
|
|
|
WRITE_ENUM_FIELD(xmloption, XmlOptionType);
|
|
|
|
WRITE_OID_FIELD(type);
|
|
|
|
WRITE_INT_FIELD(typmod);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2006-12-24 01:29:20 +01:00
|
|
|
}
|
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outNullTest(StringInfo str, const NullTest *node)
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("NULLTEST");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_ENUM_FIELD(nulltesttype, NullTestType);
|
2010-01-02 00:03:10 +01:00
|
|
|
WRITE_BOOL_FIELD(argisrow);
|
2015-02-22 20:40:27 +01:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBooleanTest(StringInfo str, const BooleanTest *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("BOOLEANTEST");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_ENUM_FIELD(booltesttype, BoolTestType);
|
2015-02-22 20:40:27 +01:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCoerceToDomain(StringInfo str, const CoerceToDomain *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
2003-02-03 22:15:45 +01:00
|
|
|
WRITE_NODE_TYPE("COERCETODOMAIN");
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
2003-02-03 22:15:45 +01:00
|
|
|
WRITE_OID_FIELD(resulttype);
|
|
|
|
WRITE_INT_FIELD(resulttypmod);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(resultcollid);
|
2003-02-03 22:15:45 +01:00
|
|
|
WRITE_ENUM_FIELD(coercionformat, CoercionForm);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCoerceToDomainValue(StringInfo str, const CoerceToDomainValue *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
2003-02-03 22:15:45 +01:00
|
|
|
WRITE_NODE_TYPE("COERCETODOMAINVALUE");
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
WRITE_OID_FIELD(typeId);
|
|
|
|
WRITE_INT_FIELD(typeMod);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(collation);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
|
2003-07-03 18:34:26 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSetToDefault(StringInfo str, const SetToDefault *node)
|
2003-07-03 18:34:26 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SETTODEFAULT");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(typeId);
|
|
|
|
WRITE_INT_FIELD(typeMod);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(collation);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2003-07-03 18:34:26 +02:00
|
|
|
}
|
|
|
|
|
2007-06-11 03:16:30 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCurrentOfExpr(StringInfo str, const CurrentOfExpr *node)
|
2007-06-11 03:16:30 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CURRENTOFEXPR");
|
|
|
|
|
|
|
|
WRITE_UINT_FIELD(cvarno);
|
|
|
|
WRITE_STRING_FIELD(cursor_name);
|
2007-06-12 00:22:42 +02:00
|
|
|
WRITE_INT_FIELD(cursor_param);
|
2007-06-11 03:16:30 +02:00
|
|
|
}
|
|
|
|
|
Code review for NextValueExpr expression node type.
Add missing infrastructure for this node type, notably in ruleutils.c where
its lack could demonstrably cause EXPLAIN to fail. Add outfuncs/readfuncs
support. (outfuncs support is useful today for debugging purposes. The
readfuncs support may never be needed, since at present it would only
matter for parallel query and NextValueExpr should never appear in a
parallelizable query; but it seems like a bad idea to have a primnode type
that isn't fully supported here.) Teach planner infrastructure that
NextValueExpr is a volatile, parallel-unsafe, non-leaky expression node
with cost cpu_operator_cost. Given its limited scope of usage, there
*might* be no live bug today from the lack of that knowledge, but it's
certainly going to bite us on the rear someday. Teach pg_stat_statements
about the new node type, too.
While at it, also teach cost_qual_eval() that MinMaxExpr, SQLValueFunction,
XmlExpr, and CoerceToDomain should be charged as cpu_operator_cost.
Failing to do this for SQLValueFunction was an oversight in my commit
0bb51aa96. The others are longer-standing oversights, but no time like the
present to fix them. (In principle, CoerceToDomain could have cost much
higher than this, but it doesn't presently seem worth trying to examine the
domain's constraints here.)
Modify execExprInterp.c to execute NextValueExpr as an out-of-line
function; it seems quite unlikely to me that it's worth insisting that
it be inlined in all expression eval methods. Besides, providing the
out-of-line function doesn't stop anyone from inlining if they want to.
Adjust some places where NextValueExpr support had been inserted with the
aid of a dartboard rather than keeping it in the same order as elsewhere.
Discussion: https://postgr.es/m/23862.1499981661@sss.pgh.pa.us
2017-07-14 21:25:43 +02:00
|
|
|
static void
|
|
|
|
_outNextValueExpr(StringInfo str, const NextValueExpr *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("NEXTVALUEEXPR");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(seqid);
|
|
|
|
WRITE_OID_FIELD(typeId);
|
|
|
|
}
|
|
|
|
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
static void
|
|
|
|
_outInferenceElem(StringInfo str, const InferenceElem *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("INFERENCEELEM");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(expr);
|
|
|
|
WRITE_OID_FIELD(infercollid);
|
2015-05-19 21:17:52 +02:00
|
|
|
WRITE_OID_FIELD(inferopclass);
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outTargetEntry(StringInfo str, const TargetEntry *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("TARGETENTRY");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(expr);
|
2005-04-06 18:34:07 +02:00
|
|
|
WRITE_INT_FIELD(resno);
|
|
|
|
WRITE_STRING_FIELD(resname);
|
|
|
|
WRITE_UINT_FIELD(ressortgroupref);
|
|
|
|
WRITE_OID_FIELD(resorigtbl);
|
|
|
|
WRITE_INT_FIELD(resorigcol);
|
|
|
|
WRITE_BOOL_FIELD(resjunk);
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-03-21 17:02:16 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRangeTblRef(StringInfo str, const RangeTblRef *node)
|
2002-03-21 17:02:16 +01:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("RANGETBLREF");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_INT_FIELD(rtindex);
|
2002-03-21 17:02:16 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinExpr(StringInfo str, const JoinExpr *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_TYPE("JOINEXPR");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_ENUM_FIELD(jointype, JoinType);
|
|
|
|
WRITE_BOOL_FIELD(isNatural);
|
|
|
|
WRITE_NODE_FIELD(larg);
|
|
|
|
WRITE_NODE_FIELD(rarg);
|
2009-07-16 08:33:46 +02:00
|
|
|
WRITE_NODE_FIELD(usingClause);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(quals);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(alias);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_INT_FIELD(rtindex);
|
|
|
|
}
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outFromExpr(StringInfo str, const FromExpr *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("FROMEXPR");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(fromlist);
|
|
|
|
WRITE_NODE_FIELD(quals);
|
1999-01-21 17:08:55 +01:00
|
|
|
}
|
|
|
|
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
static void
|
|
|
|
_outOnConflictExpr(StringInfo str, const OnConflictExpr *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("ONCONFLICTEXPR");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(action, OnConflictAction);
|
|
|
|
WRITE_NODE_FIELD(arbiterElems);
|
|
|
|
WRITE_NODE_FIELD(arbiterWhere);
|
2015-08-06 02:44:27 +02:00
|
|
|
WRITE_OID_FIELD(constraint);
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
WRITE_NODE_FIELD(onConflictSet);
|
|
|
|
WRITE_NODE_FIELD(onConflictWhere);
|
|
|
|
WRITE_INT_FIELD(exclRelIndex);
|
|
|
|
WRITE_NODE_FIELD(exclRelTlist);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
2019-01-29 22:49:25 +01:00
|
|
|
* Stuff from pathnodes.h.
|
2002-12-12 16:49:42 +01:00
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2002-11-25 19:12:12 +01:00
|
|
|
* print the basic stuff of all nodes that inherit from Path
|
2002-12-05 16:50:39 +01:00
|
|
|
*
|
2010-09-23 21:16:49 +02:00
|
|
|
* Note we do NOT print the parent, else we'd be in infinite recursion.
|
|
|
|
* We can print the parent's relids for identification purposes, though.
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
* We print the pathtarget only if it's not the default one for the rel.
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
* We also do not print the whole of param_info, since it's printed by
|
|
|
|
* _outRelOptInfo; it's sufficient and less cluttering to print just the
|
|
|
|
* required outer relids.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(StringInfo str, const Path *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_ENUM_FIELD(pathtype, NodeTag);
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " :parent_relids ");
|
2016-09-16 15:36:19 +02:00
|
|
|
outBitmapset(str, node->parent->relids);
|
2016-03-14 21:59:59 +01:00
|
|
|
if (node->pathtarget != node->parent->reltarget)
|
|
|
|
WRITE_NODE_FIELD(pathtarget);
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " :required_outer ");
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
if (node->param_info)
|
2016-09-16 15:36:19 +02:00
|
|
|
outBitmapset(str, node->param_info->ppi_req_outer);
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
else
|
2016-09-16 15:36:19 +02:00
|
|
|
outBitmapset(str, NULL);
|
2015-11-11 14:57:52 +01:00
|
|
|
WRITE_BOOL_FIELD(parallel_aware);
|
2016-01-20 20:29:22 +01:00
|
|
|
WRITE_BOOL_FIELD(parallel_safe);
|
2016-06-09 15:08:27 +02:00
|
|
|
WRITE_INT_FIELD(parallel_workers);
|
2012-01-28 01:26:38 +01:00
|
|
|
WRITE_FLOAT_FIELD(rows, "%.0f");
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
|
|
|
|
WRITE_FLOAT_FIELD(total_cost, "%.2f");
|
|
|
|
WRITE_NODE_FIELD(pathkeys);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2002-11-25 19:12:12 +01:00
|
|
|
* print the basic stuff of all nodes that inherit from JoinPath
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPathInfo(StringInfo str, const JoinPath *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_ENUM_FIELD(jointype, JoinType);
|
2017-04-08 04:20:03 +02:00
|
|
|
WRITE_BOOL_FIELD(inner_unique);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(outerjoinpath);
|
|
|
|
WRITE_NODE_FIELD(innerjoinpath);
|
|
|
|
WRITE_NODE_FIELD(joinrestrictinfo);
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPath(StringInfo str, const Path *node)
|
2002-11-25 19:12:12 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PATH");
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outIndexPath(StringInfo str, const IndexPath *node)
|
2002-11-25 19:12:12 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("INDEXPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(indexinfo);
|
2004-01-06 00:39:54 +01:00
|
|
|
WRITE_NODE_FIELD(indexclauses);
|
2010-12-03 02:50:48 +01:00
|
|
|
WRITE_NODE_FIELD(indexorderbys);
|
2011-12-25 01:03:21 +01:00
|
|
|
WRITE_NODE_FIELD(indexorderbycols);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_ENUM_FIELD(indexscandir, ScanDirection);
|
2005-04-21 04:28:02 +02:00
|
|
|
WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
|
|
|
|
WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
|
2002-11-25 19:12:12 +01:00
|
|
|
}
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBitmapHeapPath(StringInfo str, const BitmapHeapPath *node)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("BITMAPHEAPPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(bitmapqual);
|
|
|
|
}
|
|
|
|
|
2005-04-21 21:18:13 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBitmapAndPath(StringInfo str, const BitmapAndPath *node)
|
2005-04-21 21:18:13 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("BITMAPANDPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2005-04-21 21:18:13 +02:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(bitmapquals);
|
|
|
|
WRITE_FLOAT_FIELD(bitmapselectivity, "%.4f");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outBitmapOrPath(StringInfo str, const BitmapOrPath *node)
|
2005-04-21 21:18:13 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("BITMAPORPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2005-04-21 21:18:13 +02:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(bitmapquals);
|
|
|
|
WRITE_FLOAT_FIELD(bitmapselectivity, "%.4f");
|
|
|
|
}
|
|
|
|
|
1999-11-23 21:07:06 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outTidPath(StringInfo str, const TidPath *node)
|
1999-11-23 21:07:06 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("TIDPATH");
|
1999-11-23 21:07:06 +01:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
1999-11-23 21:07:06 +01:00
|
|
|
|
2005-11-26 23:14:57 +01:00
|
|
|
WRITE_NODE_FIELD(tidquals);
|
1999-11-23 21:07:06 +01:00
|
|
|
}
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
static void
|
|
|
|
_outSubqueryScanPath(StringInfo str, const SubqueryScanPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SUBQUERYSCANPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
}
|
|
|
|
|
2011-02-20 06:17:18 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outForeignPath(StringInfo str, const ForeignPath *node)
|
2011-02-20 06:17:18 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("FOREIGNPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2011-02-20 06:17:18 +01:00
|
|
|
|
Allow foreign and custom joins to handle EvalPlanQual rechecks.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 provided basic
infrastructure for allowing a foreign data wrapper or custom scan
provider to replace a join of one or more tables with a scan.
However, this infrastructure failed to take into account the need
for possible EvalPlanQual rechecks, and ExecScanFetch would fail
an assertion (or just overwrite memory) if such a check was attempted
for a plan containing a pushed-down join. To fix, adjust the EPQ
machinery to skip some processing steps when scanrelid == 0, making
those the responsibility of scan's recheck method, which also has
the responsibility in this case of correctly populating the relevant
slot.
To allow foreign scans to gain control in the right place to make
use of this new facility, add a new, optional RecheckForeignScan
method. Also, allow a foreign scan to have a child plan, which can
be used to correctly populate the slot (or perhaps for something
else, but this is the only use currently envisioned).
KaiGai Kohei, reviewed by Robert Haas, Etsuro Fujita, and Kyotaro
Horiguchi.
2015-12-08 18:31:03 +01:00
|
|
|
WRITE_NODE_FIELD(fdw_outerpath);
|
2012-03-05 22:15:59 +01:00
|
|
|
WRITE_NODE_FIELD(fdw_private);
|
2011-02-20 06:17:18 +01:00
|
|
|
}
|
|
|
|
|
2014-11-07 23:26:02 +01:00
|
|
|
static void
|
|
|
|
_outCustomPath(StringInfo str, const CustomPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CUSTOMPATH");
|
2014-11-22 00:21:46 +01:00
|
|
|
|
2014-11-07 23:26:02 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2014-11-22 00:21:46 +01:00
|
|
|
|
2014-11-07 23:26:02 +01:00
|
|
|
WRITE_UINT_FIELD(flags);
|
2015-08-06 02:44:27 +02:00
|
|
|
WRITE_NODE_FIELD(custom_paths);
|
2014-11-22 00:21:46 +01:00
|
|
|
WRITE_NODE_FIELD(custom_private);
|
|
|
|
appendStringInfoString(str, " :methods ");
|
2016-09-16 15:36:19 +02:00
|
|
|
outToken(str, node->methods->CustomName);
|
2014-11-07 23:26:02 +01:00
|
|
|
}
|
|
|
|
|
2000-11-12 01:37:02 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAppendPath(StringInfo str, const AppendPath *node)
|
2000-11-12 01:37:02 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("APPENDPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2000-11-12 01:37:02 +01:00
|
|
|
|
2017-03-21 14:48:04 +01:00
|
|
|
WRITE_NODE_FIELD(partitioned_rels);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(subpaths);
|
2018-06-01 01:07:13 +02:00
|
|
|
WRITE_INT_FIELD(first_partial_path);
|
Use Append rather than MergeAppend for scanning ordered partitions.
If we need ordered output from a scan of a partitioned table, but
the ordering matches the partition ordering, then we don't need to
use a MergeAppend to combine the pre-ordered per-partition scan
results: a plain Append will produce the same results. This
both saves useless comparison work inside the MergeAppend proper,
and allows us to start returning tuples after istarting up just
the first child node not all of them.
However, all is not peaches and cream, because if some of the
child nodes have high startup costs then there will be big
discontinuities in the tuples-returned-versus-elapsed-time curve.
The planner's cost model cannot handle that (yet, anyway).
If we model the Append's startup cost as being just the first
child's startup cost, we may drastically underestimate the cost
of fetching slightly more tuples than are available from the first
child. Since we've had bad experiences with over-optimistic choices
of "fast start" plans for ORDER BY LIMIT queries, that seems scary.
As a klugy workaround, set the startup cost estimate for an ordered
Append to be the sum of its children's startup costs (as MergeAppend
would). This doesn't really describe reality, but it's less likely
to cause a bad plan choice than an underestimated startup cost would.
In practice, the cases where we really care about this optimization
will have child plans that are IndexScans with zero startup cost,
so that the overly conservative estimate is still just zero.
David Rowley, reviewed by Julien Rouhaud and Antonin Houska
Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
2019-04-06 01:20:30 +02:00
|
|
|
WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
|
2000-11-12 01:37:02 +01:00
|
|
|
}
|
|
|
|
|
2010-10-14 22:56:39 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outMergeAppendPath(StringInfo str, const MergeAppendPath *node)
|
2010-10-14 22:56:39 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MERGEAPPENDPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2010-10-14 22:56:39 +02:00
|
|
|
|
2017-03-21 14:48:04 +01:00
|
|
|
WRITE_NODE_FIELD(partitioned_rels);
|
2010-10-14 22:56:39 +02:00
|
|
|
WRITE_NODE_FIELD(subpaths);
|
2010-11-18 06:30:10 +01:00
|
|
|
WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
|
2010-10-14 22:56:39 +02:00
|
|
|
}
|
|
|
|
|
2002-11-06 01:00:45 +01:00
|
|
|
static void
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
_outGroupResultPath(StringInfo str, const GroupResultPath *node)
|
2002-11-06 01:00:45 +01:00
|
|
|
{
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
WRITE_NODE_TYPE("GROUPRESULTPATH");
|
2002-11-06 01:00:45 +01:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2002-11-06 01:00:45 +01:00
|
|
|
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
WRITE_NODE_FIELD(quals);
|
2002-11-06 01:00:45 +01:00
|
|
|
}
|
|
|
|
|
2002-11-30 06:21:03 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outMaterialPath(StringInfo str, const MaterialPath *node)
|
2002-11-30 06:21:03 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MATERIALPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2002-11-30 06:21:03 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
}
|
|
|
|
|
2003-01-20 19:55:07 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outUniquePath(StringInfo str, const UniquePath *node)
|
2003-01-20 19:55:07 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("UNIQUEPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathInfo(str, (const Path *) node);
|
2003-01-20 19:55:07 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
2004-01-05 19:04:39 +01:00
|
|
|
WRITE_ENUM_FIELD(umethod, UniquePathMethod);
|
2008-08-14 20:48:00 +02:00
|
|
|
WRITE_NODE_FIELD(in_operators);
|
|
|
|
WRITE_NODE_FIELD(uniq_exprs);
|
2003-01-20 19:55:07 +01:00
|
|
|
}
|
|
|
|
|
2015-11-11 12:29:03 +01:00
|
|
|
static void
|
|
|
|
_outGatherPath(StringInfo str, const GatherPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GATHERPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_BOOL_FIELD(single_copy);
|
2017-04-01 03:01:20 +02:00
|
|
|
WRITE_INT_FIELD(num_workers);
|
2015-11-11 12:29:03 +01:00
|
|
|
}
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
static void
|
|
|
|
_outProjectionPath(StringInfo str, const ProjectionPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PROJECTIONPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
Refactor planning of projection steps that don't need a Result plan node.
The original upper-planner-pathification design (commit 3fc6e2d7f5b652b4)
assumed that we could always determine during Path formation whether or not
we would need a Result plan node to perform projection of a targetlist.
That turns out not to work very well, though, because createplan.c still
has some responsibilities for choosing the specific target list associated
with sorting/grouping nodes (in particular it might choose to add resjunk
columns for sorting). We might not ever refactor that --- doing so would
push more work into Path formation, which isn't attractive --- and we
certainly won't do so for 9.6. So, while create_projection_path and
apply_projection_to_path can tell for sure what will happen if the subpath
is projection-capable, they can't tell for sure when it isn't. This is at
least a latent bug in apply_projection_to_path, which might think it can
apply a target to a non-projecting node when the node will end up computing
something different.
Also, I'd tied the creation of a ProjectionPath node to whether or not a
Result is needed, but it turns out that we sometimes need a ProjectionPath
node anyway to avoid modifying a possibly-shared subpath node. Callers had
to use create_projection_path for such cases, and we added code to them
that knew about the potential omission of a Result node and attempted to
adjust the cost estimates for that. That was uncertainly correct and
definitely ugly/unmaintainable.
To fix, have create_projection_path explicitly check whether a Result
is needed and adjust its cost estimate accordingly, though it creates
a ProjectionPath in either case. apply_projection_to_path is now mostly
just an optimized version that can avoid creating an extra Path node when
the input is known to not be shared with any other live path. (There
is one case that create_projection_path doesn't handle, which is pushing
parallel-safe expressions below a Gather node. We could make it do that
by duplicating the GatherPath, but there seems no need as yet.)
create_projection_plan still has to recheck the tlist-match condition,
which means that if the matching situation does get changed by createplan.c
then we'll have made a slightly incorrect cost estimate. But there seems
no help for that in the near term, and I doubt it occurs often enough,
let alone would change planning decisions often enough, to be worth
stressing about.
I added a "dummypp" field to ProjectionPath to track whether
create_projection_path thinks a Result is needed. This is not really
necessary as-committed because create_projection_plan doesn't look at the
flag; but it seems like a good idea to remember what we thought when
forming the cost estimate, if only for debugging purposes.
In passing, get rid of the target_parallel parameter added to
apply_projection_to_path by commit 54f5c5150. I don't think that's a good
idea because it involves callers in what should be an internal decision,
and opens us up to missing optimization opportunities if callers think they
don't need to provide a valid flag, as most don't. For the moment, this
just costs us an extra has_parallel_hazard call when planning a Gather.
If that starts to look expensive, I think a better solution would be to
teach PathTarget to carry/cache knowledge of parallel-safety of its
contents.
2016-06-22 00:38:20 +02:00
|
|
|
WRITE_BOOL_FIELD(dummypp);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
}
|
|
|
|
|
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
|
|
|
static void
|
|
|
|
_outProjectSetPath(StringInfo str, const ProjectSetPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PROJECTSETPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
}
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
static void
|
|
|
|
_outSortPath(StringInfo str, const SortPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SORTPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outGroupPath(StringInfo str, const GroupPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GROUPPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_NODE_FIELD(groupClause);
|
|
|
|
WRITE_NODE_FIELD(qual);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outUpperUniquePath(StringInfo str, const UpperUniquePath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("UPPERUNIQUEPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_INT_FIELD(numkeys);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outAggPath(StringInfo str, const AggPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("AGGPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
|
2016-06-26 20:33:38 +02:00
|
|
|
WRITE_ENUM_FIELD(aggsplit, AggSplit);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
WRITE_FLOAT_FIELD(numGroups, "%.0f");
|
|
|
|
WRITE_NODE_FIELD(groupClause);
|
|
|
|
WRITE_NODE_FIELD(qual);
|
|
|
|
}
|
|
|
|
|
2017-03-27 05:20:54 +02:00
|
|
|
static void
|
|
|
|
_outRollupData(StringInfo str, const RollupData *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("ROLLUP");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(groupClause);
|
|
|
|
WRITE_NODE_FIELD(gsets);
|
|
|
|
WRITE_NODE_FIELD(gsets_data);
|
|
|
|
WRITE_FLOAT_FIELD(numGroups, "%.0f");
|
|
|
|
WRITE_BOOL_FIELD(hashable);
|
|
|
|
WRITE_BOOL_FIELD(is_hashed);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outGroupingSetData(StringInfo str, const GroupingSetData *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GSDATA");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(set);
|
|
|
|
WRITE_FLOAT_FIELD(numGroups, "%.0f");
|
|
|
|
}
|
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
static void
|
|
|
|
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GROUPINGSETSPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
2017-03-27 05:20:54 +02:00
|
|
|
WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
|
|
|
|
WRITE_NODE_FIELD(rollups);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
WRITE_NODE_FIELD(qual);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outMinMaxAggPath(StringInfo str, const MinMaxAggPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MINMAXAGGPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(mmaggregates);
|
|
|
|
WRITE_NODE_FIELD(quals);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outWindowAggPath(StringInfo str, const WindowAggPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("WINDOWAGGPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_NODE_FIELD(winclause);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outSetOpPath(StringInfo str, const SetOpPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SETOPPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_ENUM_FIELD(cmd, SetOpCmd);
|
|
|
|
WRITE_ENUM_FIELD(strategy, SetOpStrategy);
|
|
|
|
WRITE_NODE_FIELD(distinctList);
|
|
|
|
WRITE_INT_FIELD(flagColIdx);
|
|
|
|
WRITE_INT_FIELD(firstFlag);
|
|
|
|
WRITE_FLOAT_FIELD(numGroups, "%.0f");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outRecursiveUnionPath(StringInfo str, const RecursiveUnionPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RECURSIVEUNIONPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(leftpath);
|
|
|
|
WRITE_NODE_FIELD(rightpath);
|
|
|
|
WRITE_NODE_FIELD(distinctList);
|
|
|
|
WRITE_INT_FIELD(wtParam);
|
|
|
|
WRITE_FLOAT_FIELD(numGroups, "%.0f");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outLockRowsPath(StringInfo str, const LockRowsPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("LOCKROWSPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_NODE_FIELD(rowMarks);
|
|
|
|
WRITE_INT_FIELD(epqParam);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outModifyTablePath(StringInfo str, const ModifyTablePath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MODIFYTABLEPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(operation, CmdType);
|
|
|
|
WRITE_BOOL_FIELD(canSetTag);
|
|
|
|
WRITE_UINT_FIELD(nominalRelation);
|
2018-10-07 20:33:17 +02:00
|
|
|
WRITE_UINT_FIELD(rootRelation);
|
Allow UPDATE to move rows between partitions.
When an UPDATE causes a row to no longer match the partition
constraint, try to move it to a different partition where it does
match the partition constraint. In essence, the UPDATE is split into
a DELETE from the old partition and an INSERT into the new one. This
can lead to surprising behavior in concurrency scenarios because
EvalPlanQual rechecks won't work as they normally did; the known
problems are documented. (There is a pending patch to improve the
situation further, but it needs more review.)
Amit Khandekar, reviewed and tested by Amit Langote, David Rowley,
Rajkumar Raghuwanshi, Dilip Kumar, Amul Sul, Thomas Munro, Álvaro
Herrera, Amit Kapila, and me. A few final revisions by me.
Discussion: http://postgr.es/m/CAJ3gD9do9o2ccQ7j7+tSgiE1REY65XRiMb=yJO3u3QhyP8EEPQ@mail.gmail.com
2018-01-19 21:33:06 +01:00
|
|
|
WRITE_BOOL_FIELD(partColsUpdated);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
WRITE_NODE_FIELD(resultRelations);
|
|
|
|
WRITE_NODE_FIELD(subpaths);
|
|
|
|
WRITE_NODE_FIELD(subroots);
|
|
|
|
WRITE_NODE_FIELD(withCheckOptionLists);
|
|
|
|
WRITE_NODE_FIELD(returningLists);
|
|
|
|
WRITE_NODE_FIELD(rowMarks);
|
|
|
|
WRITE_NODE_FIELD(onconflict);
|
|
|
|
WRITE_INT_FIELD(epqParam);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outLimitPath(StringInfo str, const LimitPath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("LIMITPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_NODE_FIELD(limitOffset);
|
|
|
|
WRITE_NODE_FIELD(limitCount);
|
|
|
|
}
|
|
|
|
|
2017-03-09 13:40:36 +01:00
|
|
|
static void
|
|
|
|
_outGatherMergePath(StringInfo str, const GatherMergePath *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GATHERMERGEPATH");
|
|
|
|
|
|
|
|
_outPathInfo(str, (const Path *) node);
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(subpath);
|
|
|
|
WRITE_INT_FIELD(num_workers);
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outNestPath(StringInfo str, const NestPath *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("NESTPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPathInfo(str, (const JoinPath *) node);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outMergePath(StringInfo str, const MergePath *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("MERGEPATH");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPathInfo(str, (const JoinPath *) node);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(path_mergeclauses);
|
|
|
|
WRITE_NODE_FIELD(outersortkeys);
|
|
|
|
WRITE_NODE_FIELD(innersortkeys);
|
2017-04-08 04:20:03 +02:00
|
|
|
WRITE_BOOL_FIELD(skip_mark_restore);
|
2009-11-15 03:45:35 +01:00
|
|
|
WRITE_BOOL_FIELD(materialize_inner);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outHashPath(StringInfo str, const HashPath *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("HASHPATH");
|
|
|
|
|
2011-12-07 20:46:56 +01:00
|
|
|
_outJoinPathInfo(str, (const JoinPath *) node);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(path_hashclauses);
|
2009-03-26 18:15:35 +01:00
|
|
|
WRITE_INT_FIELD(num_batches);
|
2018-06-01 01:07:13 +02:00
|
|
|
WRITE_FLOAT_FIELD(inner_rows_total, "%.0f");
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2007-02-19 08:03:34 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlannerGlobal(StringInfo str, const PlannerGlobal *node)
|
2007-02-19 08:03:34 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PLANNERGLOBAL");
|
|
|
|
|
|
|
|
/* NB: this isn't a complete set of fields */
|
2007-02-22 23:00:26 +01:00
|
|
|
WRITE_NODE_FIELD(subplans);
|
2007-02-27 02:11:26 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(rewindPlanIDs);
|
2007-02-22 23:00:26 +01:00
|
|
|
WRITE_NODE_FIELD(finalrtable);
|
2009-10-12 20:10:51 +02:00
|
|
|
WRITE_NODE_FIELD(finalrowmarks);
|
2011-02-26 00:56:23 +01:00
|
|
|
WRITE_NODE_FIELD(resultRelations);
|
2017-05-01 14:23:01 +02:00
|
|
|
WRITE_NODE_FIELD(rootResultRelations);
|
2007-10-11 20:05:27 +02:00
|
|
|
WRITE_NODE_FIELD(relationOids);
|
2008-09-09 20:58:09 +02:00
|
|
|
WRITE_NODE_FIELD(invalItems);
|
2017-11-13 21:24:12 +01:00
|
|
|
WRITE_NODE_FIELD(paramExecTypes);
|
2008-10-21 22:42:53 +02:00
|
|
|
WRITE_UINT_FIELD(lastPHId);
|
2011-02-10 05:27:07 +01:00
|
|
|
WRITE_UINT_FIELD(lastRowMarkId);
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
WRITE_INT_FIELD(lastPlanNodeId);
|
2008-10-21 22:42:53 +02:00
|
|
|
WRITE_BOOL_FIELD(transientPlan);
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
WRITE_BOOL_FIELD(dependsOnRole);
|
2015-09-16 21:38:47 +02:00
|
|
|
WRITE_BOOL_FIELD(parallelModeOK);
|
|
|
|
WRITE_BOOL_FIELD(parallelModeNeeded);
|
2016-08-19 20:03:07 +02:00
|
|
|
WRITE_CHAR_FIELD(maxParallelHazard);
|
2007-02-19 08:03:34 +01:00
|
|
|
}
|
|
|
|
|
2005-06-06 00:32:58 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlannerInfo(StringInfo str, const PlannerInfo *node)
|
2005-06-06 00:32:58 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PLANNERINFO");
|
|
|
|
|
2005-06-06 06:13:36 +02:00
|
|
|
/* NB: this isn't a complete set of fields */
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(parse);
|
2007-02-19 08:03:34 +01:00
|
|
|
WRITE_NODE_FIELD(glob);
|
|
|
|
WRITE_UINT_FIELD(query_level);
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
WRITE_NODE_FIELD(plan_params);
|
2015-08-12 05:48:37 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(outer_params);
|
2012-01-28 01:26:38 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(all_baserels);
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(nullable_baserels);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(join_rel_list);
|
2009-11-28 01:46:19 +01:00
|
|
|
WRITE_INT_FIELD(join_cur_level);
|
2007-02-19 08:03:34 +01:00
|
|
|
WRITE_NODE_FIELD(init_plans);
|
2008-10-04 23:56:55 +02:00
|
|
|
WRITE_NODE_FIELD(cte_plan_ids);
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
WRITE_NODE_FIELD(multiexpr_params);
|
2007-01-20 21:45:41 +01:00
|
|
|
WRITE_NODE_FIELD(eq_classes);
|
|
|
|
WRITE_NODE_FIELD(canon_pathkeys);
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
WRITE_NODE_FIELD(left_join_clauses);
|
|
|
|
WRITE_NODE_FIELD(right_join_clauses);
|
|
|
|
WRITE_NODE_FIELD(full_join_clauses);
|
2008-08-14 20:48:00 +02:00
|
|
|
WRITE_NODE_FIELD(join_info_list);
|
2006-01-31 22:39:25 +01:00
|
|
|
WRITE_NODE_FIELD(append_rel_list);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
WRITE_NODE_FIELD(rowMarks);
|
2008-10-21 22:42:53 +02:00
|
|
|
WRITE_NODE_FIELD(placeholder_list);
|
2016-06-18 21:22:34 +02:00
|
|
|
WRITE_NODE_FIELD(fkey_list);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(query_pathkeys);
|
2005-08-28 00:13:44 +02:00
|
|
|
WRITE_NODE_FIELD(group_pathkeys);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_NODE_FIELD(window_pathkeys);
|
2008-08-05 04:43:18 +02:00
|
|
|
WRITE_NODE_FIELD(distinct_pathkeys);
|
2005-08-28 00:13:44 +02:00
|
|
|
WRITE_NODE_FIELD(sort_pathkeys);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
WRITE_NODE_FIELD(processed_tlist);
|
2010-11-04 17:01:17 +01:00
|
|
|
WRITE_NODE_FIELD(minmax_aggs);
|
2006-09-20 00:49:53 +02:00
|
|
|
WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
|
2005-08-28 00:13:44 +02:00
|
|
|
WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
|
2010-11-18 06:30:10 +01:00
|
|
|
WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
WRITE_UINT_FIELD(qual_security_level);
|
2018-04-23 22:57:43 +02:00
|
|
|
WRITE_ENUM_FIELD(inhTargetKind, InheritanceKind);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_BOOL_FIELD(hasJoinRTEs);
|
2012-08-27 04:48:55 +02:00
|
|
|
WRITE_BOOL_FIELD(hasLateralRTEs);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_BOOL_FIELD(hasHavingQual);
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
WRITE_BOOL_FIELD(hasPseudoConstantQuals);
|
2008-10-04 23:56:55 +02:00
|
|
|
WRITE_BOOL_FIELD(hasRecursion);
|
|
|
|
WRITE_INT_FIELD(wt_param_id);
|
2010-07-12 19:01:06 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(curOuterRels);
|
|
|
|
WRITE_NODE_FIELD(curOuterParams);
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
WRITE_BOOL_FIELD(partColsUpdated);
|
2005-06-06 00:32:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRelOptInfo(StringInfo str, const RelOptInfo *node)
|
2005-06-06 00:32:58 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RELOPTINFO");
|
|
|
|
|
|
|
|
/* NB: this isn't a complete set of fields */
|
|
|
|
WRITE_ENUM_FIELD(reloptkind, RelOptKind);
|
|
|
|
WRITE_BITMAPSET_FIELD(relids);
|
|
|
|
WRITE_FLOAT_FIELD(rows, "%.0f");
|
2012-09-02 00:16:24 +02:00
|
|
|
WRITE_BOOL_FIELD(consider_startup);
|
Fix planner's cost estimation for SEMI/ANTI joins with inner indexscans.
When the inner side of a nestloop SEMI or ANTI join is an indexscan that
uses all the join clauses as indexquals, it can be presumed that both
matched and unmatched outer rows will be processed very quickly: for
matched rows, we'll stop after fetching one row from the indexscan, while
for unmatched rows we'll have an indexscan that finds no matching index
entries, which should also be quick. The planner already knew about this,
but it was nonetheless charging for at least one full run of the inner
indexscan, as a consequence of concerns about the behavior of materialized
inner scans --- but those concerns don't apply in the fast case. If the
inner side has low cardinality (many matching rows) this could make an
indexscan plan look far more expensive than it actually is. To fix,
rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we
don't add the inner scan cost until we've inspected the indexquals, and
then we can add either the full-run cost or just the first tuple's cost as
appropriate.
Experimentation with this fix uncovered another problem: add_path and
friends were coded to disregard cheap startup cost when considering
parameterized paths. That's usually okay (and desirable, because it thins
the path herd faster); but in this fast case for SEMI/ANTI joins, it could
result in throwing away the desired plain indexscan path in favor of a
bitmap scan path before we ever get to the join costing logic. In the
many-matching-rows cases of interest here, a bitmap scan will do a lot more
work than required, so this is a problem. To fix, add a per-relation flag
consider_param_startup that works like the existing consider_startup flag,
but applies to parameterized paths, and set it for relations that are the
inside of a SEMI or ANTI join.
To make this patch reasonably safe to back-patch, care has been taken to
avoid changing the planner's behavior except in the very narrow case of
SEMI/ANTI joins with inner indexscans. There are places in
compare_path_costs_fuzzily and add_path_precheck that are not terribly
consistent with the new approach, but changing them will affect planner
decisions at the margins in other cases, so we'll leave that for a
HEAD-only fix.
Back-patch to 9.3; before that, the consider_startup flag didn't exist,
meaning that the second aspect of the patch would be too invasive.
Per a complaint from Peter Holzer and analysis by Tomas Vondra.
2015-06-03 17:58:47 +02:00
|
|
|
WRITE_BOOL_FIELD(consider_param_startup);
|
Generate parallel sequential scan plans in simple cases.
Add a new flag, consider_parallel, to each RelOptInfo, indicating
whether a plan for that relation could conceivably be run inside of
a parallel worker. Right now, we're pretty conservative: for example,
it might be possible to defer applying a parallel-restricted qual
in a worker, and later do it in the leader, but right now we just
don't try to parallelize access to that relation. That's probably
the right decision in most cases, anyway.
Using the new flag, generate parallel sequential scan plans for plain
baserels, meaning that we now have parallel sequential scan in
PostgreSQL. The logic here is pretty unsophisticated right now: the
costing model probably isn't right in detail, and we can't push joins
beneath Gather nodes, so the number of plans that can actually benefit
from this is pretty limited right now. Lots more work is needed.
Nevertheless, it seems time to enable this functionality so that all
this code can actually be tested easily by users and developers.
Note that, if you wish to test this functionality, it will be
necessary to set max_parallel_degree to a value greater than the
default of 0. Once a few more loose ends have been tidied up here, we
might want to consider changing the default value of this GUC, but
I'm leaving it alone for now.
Along the way, fix a bug in cost_gather: the previous coding thought
that a Gather node's transfer overhead should be costed on the basis of
the relation size rather than the number of tuples that actually need
to be passed off to the leader.
Patch by me, reviewed in earlier versions by Amit Kapila.
2015-11-11 15:02:52 +01:00
|
|
|
WRITE_BOOL_FIELD(consider_parallel);
|
2016-03-14 21:59:59 +01:00
|
|
|
WRITE_NODE_FIELD(reltarget);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(pathlist);
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
WRITE_NODE_FIELD(ppilist);
|
2016-01-20 20:29:22 +01:00
|
|
|
WRITE_NODE_FIELD(partial_pathlist);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(cheapest_startup_path);
|
|
|
|
WRITE_NODE_FIELD(cheapest_total_path);
|
|
|
|
WRITE_NODE_FIELD(cheapest_unique_path);
|
2012-01-28 01:26:38 +01:00
|
|
|
WRITE_NODE_FIELD(cheapest_parameterized_paths);
|
2015-12-11 21:52:16 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(direct_lateral_relids);
|
2015-12-08 00:56:14 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(lateral_relids);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_UINT_FIELD(relid);
|
2014-12-11 09:19:50 +01:00
|
|
|
WRITE_OID_FIELD(reltablespace);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_ENUM_FIELD(rtekind, RTEKind);
|
2006-01-31 22:39:25 +01:00
|
|
|
WRITE_INT_FIELD(min_attr);
|
|
|
|
WRITE_INT_FIELD(max_attr);
|
2012-08-27 04:48:55 +02:00
|
|
|
WRITE_NODE_FIELD(lateral_vars);
|
2013-08-18 02:22:37 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(lateral_referencers);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(indexlist);
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
WRITE_NODE_FIELD(statlist);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_UINT_FIELD(pages);
|
|
|
|
WRITE_FLOAT_FIELD(tuples, "%.0f");
|
2011-10-14 23:23:01 +02:00
|
|
|
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
|
2011-09-03 21:35:12 +02:00
|
|
|
WRITE_NODE_FIELD(subroot);
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
WRITE_NODE_FIELD(subplan_params);
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
WRITE_INT_FIELD(rel_parallel_workers);
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
WRITE_OID_FIELD(serverid);
|
Avoid invalidating all foreign-join cached plans when user mappings change.
We must not push down a foreign join when the foreign tables involved
should be accessed under different user mappings. Previously we tried
to enforce that rule literally during planning, but that meant that the
resulting plans were dependent on the current contents of the
pg_user_mapping catalog, and we had to blow away all cached plans
containing any remote join when anything at all changed in pg_user_mapping.
This could have been improved somewhat, but the fact that a syscache inval
callback has very limited info about what changed made it hard to do better
within that design. Instead, let's change the planner to not consider user
mappings per se, but to allow a foreign join if both RTEs have the same
checkAsUser value. If they do, then they necessarily will use the same
user mapping at runtime, and we don't need to know specifically which one
that is. Post-plan-time changes in pg_user_mapping no longer require any
plan invalidation.
This rule does give up some optimization ability, to wit where two foreign
table references come from views with different owners or one's from a view
and one's directly in the query, but nonetheless the same user mapping
would have applied. We'll sacrifice the first case, but to not regress
more than we have to in the second case, allow a foreign join involving
both zero and nonzero checkAsUser values if the nonzero one is the same as
the prevailing effective userID. In that case, mark the plan as only
runnable by that userID.
The plancache code already had a notion of plans being userID-specific,
in order to support RLS. It was a little confused though, in particular
lacking clarity of thought as to whether it was the rewritten query or just
the finished plan that's dependent on the userID. Rearrange that code so
that it's clearer what depends on which, and so that the same logic applies
to both RLS-injected role dependency and foreign-join-injected role
dependency.
Note that this patch doesn't remove the other issue mentioned in the
original complaint, which is that while we'll reliably stop using a foreign
join if it's disallowed in a new context, we might fail to start using a
foreign join if it's now allowed, but we previously created a generic
cached plan that didn't use one. It was agreed that the chance of winning
that way was not high enough to justify the much larger number of plan
invalidations that would have to occur if we tried to cause it to happen.
In passing, clean up randomly-varying spelling of EXPLAIN commands in
postgres_fdw.sql, and fix a COSTS ON example that had been allowed to
leak into the committed tests.
This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the
previous attempt at ensuring we wouldn't push down foreign joins that
span permissions contexts.
Etsuro Fujita and Tom Lane
Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
|
|
|
WRITE_OID_FIELD(userid);
|
|
|
|
WRITE_BOOL_FIELD(useridiscurrent);
|
Revise FDW planning API, again.
Further reflection shows that a single callback isn't very workable if we
desire to let FDWs generate multiple Paths, because that forces the FDW to
do all work necessary to generate a valid Plan node for each Path. Instead
split the former PlanForeignScan API into three steps: GetForeignRelSize,
GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking
the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain,
and it's substantially more flexible for complex FDWs.
Add an fdw_private field to RelOptInfo so that the new functions can save
state there rather than possibly having to recalculate information two or
three times.
In addition, we'd not thought through what would be needed to allow an FDW
to set up subexpressions of its choice for runtime execution. We could
treat ForeignScan.fdw_private as an executable expression but that seems
likely to break existing FDWs unnecessarily (in particular, it would
restrict the set of node types allowable in fdw_private to those supported
by expression_tree_walker). Instead, invent a separate field fdw_exprs
which will receive the postprocessing appropriate for expression trees.
(One field is enough since it can be a list of expressions; also, we assume
the corresponding expression state tree(s) will be held within fdw_state,
so we don't need to add anything to ForeignScanState.)
Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this
further as we continue to work on that patch, but to me it feels a lot
closer to being right now.
2012-03-09 18:48:48 +01:00
|
|
|
/* we don't try to print fdwroutine or fdw_private */
|
2017-04-08 04:20:03 +02:00
|
|
|
/* can't print unique_for_rels/non_unique_for_rels; BMSes aren't Nodes */
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(baserestrictinfo);
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
WRITE_UINT_FIELD(baserestrict_min_security);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(joininfo);
|
2007-01-20 21:45:41 +01:00
|
|
|
WRITE_BOOL_FIELD(has_eclass_joins);
|
Disable support for partitionwise joins in problematic cases.
Commit f49842d, which added support for partitionwise joins, built the
child's tlist by applying adjust_appendrel_attrs() to the parent's. So in
the case where the parent's included a whole-row Var for the parent, the
child's contained a ConvertRowtypeExpr. To cope with that, that commit
added code to the planner, such as setrefs.c, but some code paths still
assumed that the tlist for a scan (or join) rel would only include Vars
and PlaceHolderVars, which was true before that commit, causing errors:
* When creating an explicit sort node for an input path for a mergejoin
path for a child join, prepare_sort_from_pathkeys() threw the 'could not
find pathkey item to sort' error.
* When deparsing a relation participating in a pushed down child join as a
subquery in contrib/postgres_fdw, get_relation_column_alias_ids() threw
the 'unexpected expression in subquery output' error.
* When performing set_plan_references() on a local join plan generated by
contrib/postgres_fdw for EvalPlanQual support for a pushed down child
join, fix_join_expr() threw the 'variable not found in subplan target
lists' error.
To fix these, two approaches have been proposed: one by Ashutosh Bapat and
one by me. While the former keeps building the child's tlist with a
ConvertRowtypeExpr, the latter builds it with a whole-row Var for the
child not to violate the planner assumption, and tries to fix it up later,
But both approaches need more work, so refuse to generate partitionwise
join paths when whole-row Vars are involved, instead. We don't need to
handle ConvertRowtypeExprs in the child's tlists for now, so this commit
also removes the changes to the planner.
Previously, partitionwise join computed attr_needed data for each child
separately, and built the child join's tlist using that data, which also
required an extra step for adding PlaceHolderVars to that tlist, but it
would be more efficient to build it from the parent join's tlist through
the adjust_appendrel_attrs() transformation. So this commit builds that
list that way, and simplifies build_joinrel_tlist() and placeholder.c as
well as part of set_append_rel_size() to basically what they were before
partitionwise join went in.
Back-patch to PG11 where partitionwise join was introduced.
Report by Rajkumar Raghuwanshi. Analysis by Ashutosh Bapat, who also
provided some of regression tests. Patch by me, reviewed by Robert Haas.
Discussion: https://postgr.es/m/CAKcux6ktu-8tefLWtQuuZBYFaZA83vUzuRd7c1YHC-yEWyYFpg@mail.gmail.com
2018-08-31 13:34:06 +02:00
|
|
|
WRITE_BOOL_FIELD(consider_partitionwise_join);
|
2017-04-04 05:06:36 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(top_parent_relids);
|
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning. The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.
At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.
This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.
Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
2018-04-06 21:23:04 +02:00
|
|
|
WRITE_NODE_FIELD(partitioned_child_rels);
|
2005-06-06 00:32:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outIndexOptInfo(StringInfo str, const IndexOptInfo *node)
|
2005-06-06 00:32:58 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("INDEXOPTINFO");
|
|
|
|
|
|
|
|
/* NB: this isn't a complete set of fields */
|
|
|
|
WRITE_OID_FIELD(indexoid);
|
|
|
|
/* Do NOT print rel field, else infinite recursion */
|
|
|
|
WRITE_UINT_FIELD(pages);
|
|
|
|
WRITE_FLOAT_FIELD(tuples, "%.0f");
|
Redesign the planner's handling of index-descent cost estimation.
Historically we've used a couple of very ad-hoc fudge factors to try to
get the right results when indexes of different sizes would satisfy a
query with the same number of index leaf tuples being visited. In
commit 21a39de5809cd3050a37d2554323cc1d0cbeed9d I tweaked one of these
fudge factors, with results that proved disastrous for larger indexes.
Commit bf01e34b556ff37982ba2d882db424aa484c0d07 fudged it some more,
but still with not a lot of principle behind it.
What seems like a better way to address these issues is to explicitly model
index-descent costs, since that's what's really at stake when considering
diferent indexes with similar leaf-page-level costs. We tried that once
long ago, and found that charging random_page_cost per page descended
through was way too much, because upper btree levels tend to stay in cache
in real-world workloads. However, there's still CPU costs to think about,
and the previous fudge factors can be seen as a crude attempt to account
for those costs. So this patch replaces those fudge factors with explicit
charges for the number of tuple comparisons needed to descend the index
tree, plus a small charge per page touched in the descent. The cost
multipliers are chosen so that the resulting charges are in the vicinity of
the historical (pre-9.2) fudge factors for indexes of up to about a million
tuples, while not ballooning unreasonably beyond that, as the old fudge
factor did (even more so in 9.2).
To make this work accurately for btree indexes, add some code that allows
extraction of the known root-page height from a btree. There's no
equivalent number readily available for other index types, but we can use
the log of the number of index pages as an approximate substitute.
This seems like too much of a behavioral change to risk back-patching,
but it should improve matters going forward. In 9.2 I'll just revert
the fudge-factor change.
2013-01-11 18:56:58 +01:00
|
|
|
WRITE_INT_FIELD(tree_height);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_INT_FIELD(ncolumns);
|
Redesign the planner's handling of index-descent cost estimation.
Historically we've used a couple of very ad-hoc fudge factors to try to
get the right results when indexes of different sizes would satisfy a
query with the same number of index leaf tuples being visited. In
commit 21a39de5809cd3050a37d2554323cc1d0cbeed9d I tweaked one of these
fudge factors, with results that proved disastrous for larger indexes.
Commit bf01e34b556ff37982ba2d882db424aa484c0d07 fudged it some more,
but still with not a lot of principle behind it.
What seems like a better way to address these issues is to explicitly model
index-descent costs, since that's what's really at stake when considering
diferent indexes with similar leaf-page-level costs. We tried that once
long ago, and found that charging random_page_cost per page descended
through was way too much, because upper btree levels tend to stay in cache
in real-world workloads. However, there's still CPU costs to think about,
and the previous fudge factors can be seen as a crude attempt to account
for those costs. So this patch replaces those fudge factors with explicit
charges for the number of tuple comparisons needed to descend the index
tree, plus a small charge per page touched in the descent. The cost
multipliers are chosen so that the resulting charges are in the vicinity of
the historical (pre-9.2) fudge factors for indexes of up to about a million
tuples, while not ballooning unreasonably beyond that, as the old fudge
factor did (even more so in 9.2).
To make this work accurately for btree indexes, add some code that allows
extraction of the known root-page height from a btree. There's no
equivalent number readily available for other index types, but we can use
the log of the number of index pages as an approximate substitute.
This seems like too much of a behavioral change to risk back-patching,
but it should improve matters going forward. In 9.2 I'll just revert
the fudge-factor change.
2013-01-11 18:56:58 +01:00
|
|
|
/* array fields aren't really worth the trouble to print */
|
2011-02-17 01:24:45 +01:00
|
|
|
WRITE_OID_FIELD(relam);
|
2011-10-11 20:20:06 +02:00
|
|
|
/* indexprs is redundant since we print indextlist */
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_NODE_FIELD(indpred);
|
2011-10-11 20:20:06 +02:00
|
|
|
WRITE_NODE_FIELD(indextlist);
|
Support using index-only scans with partial indexes in more cases.
Previously, the planner would reject an index-only scan if any restriction
clause for its table used a column not available from the index, even
if that restriction clause would later be dropped from the plan entirely
because it's implied by the index's predicate. This is a fairly common
situation for partial indexes because predicates using columns not included
in the index are often the most useful kind of predicate, and we have to
duplicate (or at least imply) the predicate in the WHERE clause in order
to get the index to be considered at all. So index-only scans were
essentially unavailable with such partial indexes.
To fix, we have to do detection of implied-by-predicate clauses much
earlier in the planner. This patch puts it in check_index_predicates
(nee check_partial_indexes), meaning it gets done for every partial index,
whereas we previously only considered this issue at createplan time,
so that the work was only done for an index actually selected for use.
That could result in a noticeable planning slowdown for queries against
tables with many partial indexes. However, testing suggested that there
isn't really a significant cost, especially not with reasonable numbers
of partial indexes. We do get a small additional benefit, which is that
cost_index is more accurate since it correctly discounts the evaluation
cost of clauses that will be removed. We can also avoid considering such
clauses as potential indexquals, which saves useless matching cycles in
the case where the predicate columns aren't in the index, and prevents
generating bogus plans that double-count the clause's selectivity when
the columns are in the index.
Tomas Vondra and Kyotaro Horiguchi, reviewed by Kevin Grittner and
Konstantin Knizhnik, and whacked around a little by me
2016-03-31 20:48:56 +02:00
|
|
|
WRITE_NODE_FIELD(indrestrictinfo);
|
2005-06-06 00:32:58 +02:00
|
|
|
WRITE_BOOL_FIELD(predOK);
|
|
|
|
WRITE_BOOL_FIELD(unique);
|
2011-10-23 06:43:39 +02:00
|
|
|
WRITE_BOOL_FIELD(immediate);
|
2011-02-17 01:24:45 +01:00
|
|
|
WRITE_BOOL_FIELD(hypothetical);
|
Restructure index access method API to hide most of it at the C level.
This patch reduces pg_am to just two columns, a name and a handler
function. All the data formerly obtained from pg_am is now provided
in a C struct returned by the handler function. This is similar to
the designs we've adopted for FDWs and tablesample methods. There
are multiple advantages. For one, the index AM's support functions
are now simple C functions, making them faster to call and much less
error-prone, since the C compiler can now check function signatures.
For another, this will make it far more practical to define index access
methods in installable extensions.
A disadvantage is that SQL-level code can no longer see attributes
of index AMs; in particular, some of the crosschecks in the opr_sanity
regression test are no longer possible from SQL. We've addressed that
by adding a facility for the index AM to perform such checks instead.
(Much more could be done in that line, but for now we're content if the
amvalidate functions more or less replace what opr_sanity used to do.)
We might also want to expose some sort of reporting functionality, but
this patch doesn't do that.
Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily
editorialized on by me.
2016-01-18 01:36:59 +01:00
|
|
|
/* we don't bother with fields copied from the index AM's API struct */
|
2005-06-06 00:32:58 +02:00
|
|
|
}
|
|
|
|
|
2016-06-18 21:22:34 +02:00
|
|
|
static void
|
|
|
|
_outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
WRITE_NODE_TYPE("FOREIGNKEYOPTINFO");
|
|
|
|
|
|
|
|
WRITE_UINT_FIELD(con_relid);
|
|
|
|
WRITE_UINT_FIELD(ref_relid);
|
|
|
|
WRITE_INT_FIELD(nkeys);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(conkey, node->nkeys);
|
|
|
|
WRITE_ATTRNUMBER_ARRAY(confkey, node->nkeys);
|
|
|
|
WRITE_OID_ARRAY(conpfeqop, node->nkeys);
|
2016-06-18 21:22:34 +02:00
|
|
|
WRITE_INT_FIELD(nmatched_ec);
|
|
|
|
WRITE_INT_FIELD(nmatched_rcols);
|
|
|
|
WRITE_INT_FIELD(nmatched_ri);
|
|
|
|
/* for compactness, just print the number of matches per column: */
|
|
|
|
appendStringInfoString(str, " :eclass");
|
|
|
|
for (i = 0; i < node->nkeys; i++)
|
|
|
|
appendStringInfo(str, " %d", (node->eclass[i] != NULL));
|
|
|
|
appendStringInfoString(str, " :rinfos");
|
|
|
|
for (i = 0; i < node->nkeys; i++)
|
|
|
|
appendStringInfo(str, " %d", list_length(node->rinfos[i]));
|
|
|
|
}
|
|
|
|
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
static void
|
|
|
|
_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("STATISTICEXTINFO");
|
|
|
|
|
|
|
|
/* NB: this isn't a complete set of fields */
|
|
|
|
WRITE_OID_FIELD(statOid);
|
|
|
|
/* don't write rel, leads to infinite recursion in plan tree dump */
|
|
|
|
WRITE_CHAR_FIELD(kind);
|
|
|
|
WRITE_BITMAPSET_FIELD(keys);
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2007-01-20 21:45:41 +01:00
|
|
|
/*
|
|
|
|
* To simplify reading, we just chase up to the topmost merged EC and
|
|
|
|
* print that, without bothering to show the merge-ees separately.
|
|
|
|
*/
|
|
|
|
while (node->ec_merged)
|
|
|
|
node = node->ec_merged;
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
WRITE_NODE_TYPE("EQUIVALENCECLASS");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(ec_opfamilies);
|
2011-03-20 01:29:08 +01:00
|
|
|
WRITE_OID_FIELD(ec_collation);
|
2007-01-20 21:45:41 +01:00
|
|
|
WRITE_NODE_FIELD(ec_members);
|
|
|
|
WRITE_NODE_FIELD(ec_sources);
|
2007-01-22 21:00:40 +01:00
|
|
|
WRITE_NODE_FIELD(ec_derives);
|
2007-01-20 21:45:41 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(ec_relids);
|
|
|
|
WRITE_BOOL_FIELD(ec_has_const);
|
|
|
|
WRITE_BOOL_FIELD(ec_has_volatile);
|
|
|
|
WRITE_BOOL_FIELD(ec_below_outer_join);
|
|
|
|
WRITE_BOOL_FIELD(ec_broken);
|
2007-11-08 22:49:48 +01:00
|
|
|
WRITE_UINT_FIELD(ec_sortref);
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
WRITE_UINT_FIELD(ec_min_security);
|
|
|
|
WRITE_UINT_FIELD(ec_max_security);
|
2007-01-20 21:45:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outEquivalenceMember(StringInfo str, const EquivalenceMember *node)
|
2007-01-20 21:45:41 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("EQUIVALENCEMEMBER");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(em_expr);
|
|
|
|
WRITE_BITMAPSET_FIELD(em_relids);
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(em_nullable_relids);
|
2007-01-20 21:45:41 +01:00
|
|
|
WRITE_BOOL_FIELD(em_is_const);
|
|
|
|
WRITE_BOOL_FIELD(em_is_child);
|
|
|
|
WRITE_OID_FIELD(em_datatype);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPathKey(StringInfo str, const PathKey *node)
|
2007-01-20 21:45:41 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PATHKEY");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(pk_eclass);
|
|
|
|
WRITE_OID_FIELD(pk_opfamily);
|
|
|
|
WRITE_INT_FIELD(pk_strategy);
|
|
|
|
WRITE_BOOL_FIELD(pk_nulls_first);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2016-03-14 21:59:59 +01:00
|
|
|
static void
|
|
|
|
_outPathTarget(StringInfo str, const PathTarget *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PATHTARGET");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(exprs);
|
|
|
|
if (node->sortgrouprefs)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
appendStringInfoString(str, " :sortgrouprefs");
|
|
|
|
for (i = 0; i < list_length(node->exprs); i++)
|
|
|
|
appendStringInfo(str, " %u", node->sortgrouprefs[i]);
|
|
|
|
}
|
|
|
|
WRITE_FLOAT_FIELD(cost.startup, "%.2f");
|
|
|
|
WRITE_FLOAT_FIELD(cost.per_tuple, "%.2f");
|
|
|
|
WRITE_INT_FIELD(width);
|
|
|
|
}
|
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
static void
|
|
|
|
_outParamPathInfo(StringInfo str, const ParamPathInfo *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PARAMPATHINFO");
|
|
|
|
|
|
|
|
WRITE_BITMAPSET_FIELD(ppi_req_outer);
|
|
|
|
WRITE_FLOAT_FIELD(ppi_rows, "%.0f");
|
|
|
|
WRITE_NODE_FIELD(ppi_clauses);
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRestrictInfo(StringInfo str, const RestrictInfo *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("RESTRICTINFO");
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2003-06-16 00:51:45 +02:00
|
|
|
/* NB: this isn't a complete set of fields */
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(clause);
|
2004-01-05 06:07:36 +01:00
|
|
|
WRITE_BOOL_FIELD(is_pushed_down);
|
2005-11-15 00:54:23 +01:00
|
|
|
WRITE_BOOL_FIELD(outerjoin_delayed);
|
2004-01-05 06:07:36 +01:00
|
|
|
WRITE_BOOL_FIELD(can_join);
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
WRITE_BOOL_FIELD(pseudoconstant);
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
WRITE_BOOL_FIELD(leakproof);
|
|
|
|
WRITE_UINT_FIELD(security_level);
|
2004-01-04 04:51:52 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(clause_relids);
|
2005-06-09 06:19:00 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(required_relids);
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(outer_relids);
|
2009-04-16 22:42:16 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(nullable_relids);
|
2003-02-08 21:20:55 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(left_relids);
|
|
|
|
WRITE_BITMAPSET_FIELD(right_relids);
|
2004-01-04 01:07:32 +01:00
|
|
|
WRITE_NODE_FIELD(orclause);
|
2007-02-12 18:19:30 +01:00
|
|
|
/* don't write parent_ec, leads to infinite recursion in plan tree dump */
|
2009-02-07 00:43:24 +01:00
|
|
|
WRITE_FLOAT_FIELD(norm_selec, "%.4f");
|
|
|
|
WRITE_FLOAT_FIELD(outer_selec, "%.4f");
|
2007-01-20 21:45:41 +01:00
|
|
|
WRITE_NODE_FIELD(mergeopfamilies);
|
2007-02-12 18:19:30 +01:00
|
|
|
/* don't write left_ec, leads to infinite recursion in plan tree dump */
|
|
|
|
/* don't write right_ec, leads to infinite recursion in plan tree dump */
|
2007-01-22 21:00:40 +01:00
|
|
|
WRITE_NODE_FIELD(left_em);
|
|
|
|
WRITE_NODE_FIELD(right_em);
|
2007-01-20 21:45:41 +01:00
|
|
|
WRITE_BOOL_FIELD(outer_is_left);
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_OID_FIELD(hashjoinoperator);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
static void
|
|
|
|
_outIndexClause(StringInfo str, const IndexClause *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("INDEXCLAUSE");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(rinfo);
|
|
|
|
WRITE_NODE_FIELD(indexquals);
|
|
|
|
WRITE_BOOL_FIELD(lossy);
|
|
|
|
WRITE_INT_FIELD(indexcol);
|
|
|
|
WRITE_NODE_FIELD(indexcols);
|
|
|
|
}
|
|
|
|
|
2008-10-21 22:42:53 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlaceHolderVar(StringInfo str, const PlaceHolderVar *node)
|
2008-10-21 22:42:53 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PLACEHOLDERVAR");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(phexpr);
|
|
|
|
WRITE_BITMAPSET_FIELD(phrels);
|
|
|
|
WRITE_UINT_FIELD(phid);
|
|
|
|
WRITE_UINT_FIELD(phlevelsup);
|
|
|
|
}
|
|
|
|
|
2008-08-14 20:48:00 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
|
2005-12-20 03:30:36 +01:00
|
|
|
{
|
2008-08-14 20:48:00 +02:00
|
|
|
WRITE_NODE_TYPE("SPECIALJOININFO");
|
2005-12-20 03:30:36 +01:00
|
|
|
|
|
|
|
WRITE_BITMAPSET_FIELD(min_lefthand);
|
|
|
|
WRITE_BITMAPSET_FIELD(min_righthand);
|
2007-08-31 03:44:06 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(syn_lefthand);
|
|
|
|
WRITE_BITMAPSET_FIELD(syn_righthand);
|
2008-08-14 20:48:00 +02:00
|
|
|
WRITE_ENUM_FIELD(jointype, JoinType);
|
2005-12-20 03:30:36 +01:00
|
|
|
WRITE_BOOL_FIELD(lhs_strict);
|
2007-05-23 01:23:58 +02:00
|
|
|
WRITE_BOOL_FIELD(delay_upper_joins);
|
Improve planner's cost estimation in the presence of semijoins.
If we have a semijoin, say
SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y)
and we're estimating the cost of a parameterized indexscan on x, the number
of repetitions of the indexscan should not be taken as the size of y; it'll
really only be the number of distinct values of y1, because the only valid
plan with y on the outside of a nestloop would require y to be unique-ified
before joining it to x. Most of the time this doesn't make that much
difference, but sometimes it can lead to drastically underestimating the
cost of the indexscan and hence choosing a bad plan, as pointed out by
David Kubečka.
Fixing this is a bit difficult because parameterized indexscans are costed
out quite early in the planning process, before we have the information
that would be needed to call estimate_num_groups() and thereby estimate the
number of distinct values of the join column(s). However we can move the
code that extracts a semijoin RHS's unique-ification columns, so that it's
done in initsplan.c rather than on-the-fly in create_unique_path(). That
shouldn't make any difference speed-wise and it's really a bit cleaner too.
The other bit of information we need is the size of the semijoin RHS,
which is easy if it's a single relation (we make those estimates before
considering indexscan costs) but problematic if it's a join relation.
The solution adopted here is just to use the product of the sizes of the
join component rels. That will generally be an overestimate, but since
estimate_num_groups() only uses this input as a clamp, an overestimate
shouldn't hurt us too badly. In any case we don't allow this new logic
to produce a value larger than we would have chosen before, so that at
worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
|
|
|
WRITE_BOOL_FIELD(semi_can_btree);
|
|
|
|
WRITE_BOOL_FIELD(semi_can_hash);
|
|
|
|
WRITE_NODE_FIELD(semi_operators);
|
|
|
|
WRITE_NODE_FIELD(semi_rhs_exprs);
|
2003-01-20 19:55:07 +01:00
|
|
|
}
|
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAppendRelInfo(StringInfo str, const AppendRelInfo *node)
|
2006-01-31 22:39:25 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("APPENDRELINFO");
|
|
|
|
|
|
|
|
WRITE_UINT_FIELD(parent_relid);
|
|
|
|
WRITE_UINT_FIELD(child_relid);
|
|
|
|
WRITE_OID_FIELD(parent_reltype);
|
|
|
|
WRITE_OID_FIELD(child_reltype);
|
|
|
|
WRITE_NODE_FIELD(translated_vars);
|
|
|
|
WRITE_OID_FIELD(parent_reloid);
|
|
|
|
}
|
|
|
|
|
2008-10-21 22:42:53 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node)
|
2008-10-21 22:42:53 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PLACEHOLDERINFO");
|
|
|
|
|
|
|
|
WRITE_UINT_FIELD(phid);
|
|
|
|
WRITE_NODE_FIELD(ph_var);
|
|
|
|
WRITE_BITMAPSET_FIELD(ph_eval_at);
|
2013-08-18 02:22:37 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(ph_lateral);
|
2008-10-21 22:42:53 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(ph_needed);
|
|
|
|
WRITE_INT_FIELD(ph_width);
|
|
|
|
}
|
|
|
|
|
2010-11-04 17:01:17 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outMinMaxAggInfo(StringInfo str, const MinMaxAggInfo *node)
|
2010-11-04 17:01:17 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MINMAXAGGINFO");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(aggfnoid);
|
|
|
|
WRITE_OID_FIELD(aggsortop);
|
|
|
|
WRITE_NODE_FIELD(target);
|
2011-03-22 05:34:31 +01:00
|
|
|
/* We intentionally omit subroot --- too large, not interesting enough */
|
|
|
|
WRITE_NODE_FIELD(path);
|
|
|
|
WRITE_FLOAT_FIELD(pathcost, "%.2f");
|
|
|
|
WRITE_NODE_FIELD(param);
|
2010-11-04 17:01:17 +01:00
|
|
|
}
|
|
|
|
|
2007-02-19 08:03:34 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outPlannerParamItem(StringInfo str, const PlannerParamItem *node)
|
2007-02-19 08:03:34 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PLANNERPARAMITEM");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(item);
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
WRITE_INT_FIELD(paramId);
|
2007-02-19 08:03:34 +01:00
|
|
|
}
|
|
|
|
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
|
|
|
* Stuff from extensible.h
|
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outExtensibleNode(StringInfo str, const ExtensibleNode *node)
|
|
|
|
{
|
2016-04-06 17:34:02 +02:00
|
|
|
const ExtensibleNodeMethods *methods;
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
|
|
|
|
methods = GetExtensibleNodeMethods(node->extnodename, false);
|
|
|
|
|
|
|
|
WRITE_NODE_TYPE("EXTENSIBLENODE");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(extnodename);
|
|
|
|
|
|
|
|
/* serialize the private fields */
|
|
|
|
methods->nodeOut(str, node);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
|
|
|
* Stuff from parsenodes.h.
|
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2012-04-18 16:43:16 +02:00
|
|
|
/*
|
|
|
|
* print the basic stuff of all nodes that inherit from CreateStmt
|
|
|
|
*/
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2012-04-18 16:43:16 +02:00
|
|
|
_outCreateStmtInfo(StringInfo str, const CreateStmt *node)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(relation);
|
|
|
|
WRITE_NODE_FIELD(tableElts);
|
|
|
|
WRITE_NODE_FIELD(inhRelations);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
WRITE_NODE_FIELD(partspec);
|
|
|
|
WRITE_NODE_FIELD(partbound);
|
2010-01-29 00:21:13 +01:00
|
|
|
WRITE_NODE_FIELD(ofTypename);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(constraints);
|
2006-07-02 04:23:23 +02:00
|
|
|
WRITE_NODE_FIELD(options);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_ENUM_FIELD(oncommit, OnCommitAction);
|
2004-06-18 08:14:31 +02:00
|
|
|
WRITE_STRING_FIELD(tablespacename);
|
2019-03-06 20:55:28 +01:00
|
|
|
WRITE_STRING_FIELD(accessMethod);
|
2010-07-26 01:21:22 +02:00
|
|
|
WRITE_BOOL_FIELD(if_not_exists);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2012-04-18 16:43:16 +02:00
|
|
|
static void
|
|
|
|
_outCreateStmt(StringInfo str, const CreateStmt *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CREATESTMT");
|
|
|
|
|
|
|
|
_outCreateStmtInfo(str, (const CreateStmt *) node);
|
|
|
|
}
|
|
|
|
|
2011-01-02 05:48:11 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCreateForeignTableStmt(StringInfo str, const CreateForeignTableStmt *node)
|
2011-01-02 05:48:11 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CREATEFOREIGNTABLESTMT");
|
|
|
|
|
2012-04-18 16:43:16 +02:00
|
|
|
_outCreateStmtInfo(str, (const CreateStmt *) node);
|
2011-01-02 05:48:11 +01:00
|
|
|
|
|
|
|
WRITE_STRING_FIELD(servername);
|
|
|
|
WRITE_NODE_FIELD(options);
|
|
|
|
}
|
|
|
|
|
2014-07-10 21:01:31 +02:00
|
|
|
static void
|
|
|
|
_outImportForeignSchemaStmt(StringInfo str, const ImportForeignSchemaStmt *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("IMPORTFOREIGNSCHEMASTMT");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(server_name);
|
|
|
|
WRITE_STRING_FIELD(remote_schema);
|
|
|
|
WRITE_STRING_FIELD(local_schema);
|
|
|
|
WRITE_ENUM_FIELD(list_type, ImportForeignSchemaType);
|
|
|
|
WRITE_NODE_FIELD(table_list);
|
|
|
|
WRITE_NODE_FIELD(options);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outIndexStmt(StringInfo str, const IndexStmt *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
2004-06-18 08:14:31 +02:00
|
|
|
WRITE_NODE_TYPE("INDEXSTMT");
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
WRITE_STRING_FIELD(idxname);
|
|
|
|
WRITE_NODE_FIELD(relation);
|
|
|
|
WRITE_STRING_FIELD(accessMethod);
|
2004-06-18 08:14:31 +02:00
|
|
|
WRITE_STRING_FIELD(tableSpace);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(indexParams);
|
2018-04-07 22:00:39 +02:00
|
|
|
WRITE_NODE_FIELD(indexIncludingParams);
|
2006-07-02 04:23:23 +02:00
|
|
|
WRITE_NODE_FIELD(options);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(whereClause);
|
2009-12-07 06:22:23 +01:00
|
|
|
WRITE_NODE_FIELD(excludeOpNames);
|
Avoid pre-determining index names during CREATE TABLE LIKE parsing.
Formerly, when trying to copy both indexes and comments, CREATE TABLE LIKE
had to pre-assign names to indexes that had comments, because it made up an
explicit CommentStmt command to apply the comment and so it had to know the
name for the index. This creates bad interactions with other indexes, as
shown in bug #6734 from Daniele Varrazzo: the preassignment logic couldn't
take any other indexes into account so it could choose a conflicting name.
To fix, add a field to IndexStmt that allows it to carry a comment to be
assigned to the new index. (This isn't a user-exposed feature of CREATE
INDEX, only an internal option.) Now we don't need preassignment of index
names in any situation.
I also took the opportunity to refactor DefineIndex to accept the IndexStmt
as such, rather than passing all its fields individually in a mile-long
parameter list.
Back-patch to 9.2, but no further, because it seems too dangerous to change
IndexStmt or DefineIndex's API in released branches. The bug exists back
to 9.0 where CREATE TABLE LIKE grew the ability to copy comments, but given
the lack of prior complaints we'll just let it go unfixed before 9.2.
2012-07-16 19:25:18 +02:00
|
|
|
WRITE_STRING_FIELD(idxcomment);
|
2011-01-25 21:42:03 +01:00
|
|
|
WRITE_OID_FIELD(indexOid);
|
2011-07-18 17:02:48 +02:00
|
|
|
WRITE_OID_FIELD(oldNode);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_BOOL_FIELD(unique);
|
|
|
|
WRITE_BOOL_FIELD(primary);
|
|
|
|
WRITE_BOOL_FIELD(isconstraint);
|
2009-07-29 22:56:21 +02:00
|
|
|
WRITE_BOOL_FIELD(deferrable);
|
|
|
|
WRITE_BOOL_FIELD(initdeferred);
|
Get rid of multiple applications of transformExpr() to the same tree.
transformExpr() has for many years had provisions to do nothing when
applied to an already-transformed expression tree. However, this was
always ugly and of dubious reliability, so we'd be much better off without
it. The primary historical reason for it was that gram.y sometimes
returned multiple links to the same subexpression, which is no longer true
as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE
LIKE (failing to distinguish between raw and already-transformed index
specifications) and one or two other places.
This patch removes the need for and support for re-transforming already
transformed expressions. The index case is dealt with by adding a flag
to struct IndexStmt to indicate that it's already been transformed;
which has some benefit anyway in that tablecmds.c can now Assert that
transformation has happened rather than just assuming. The other main
reason was some rather sloppy code for array type coercion, which can
be fixed (and its performance improved too) by refactoring.
I did leave transformJoinUsingClause() still constructing expressions
containing untransformed operator nodes being applied to Vars, so that
transformExpr() still has to allow Var inputs. But that's a much narrower,
and safer, special case than before, since Vars will never appear in a raw
parse tree, and they don't have any substructure to worry about.
In passing fix some oversights in the patch that added CREATE INDEX
IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These
appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
|
|
|
WRITE_BOOL_FIELD(transformed);
|
2006-08-25 06:06:58 +02:00
|
|
|
WRITE_BOOL_FIELD(concurrent);
|
Get rid of multiple applications of transformExpr() to the same tree.
transformExpr() has for many years had provisions to do nothing when
applied to an already-transformed expression tree. However, this was
always ugly and of dubious reliability, so we'd be much better off without
it. The primary historical reason for it was that gram.y sometimes
returned multiple links to the same subexpression, which is no longer true
as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE
LIKE (failing to distinguish between raw and already-transformed index
specifications) and one or two other places.
This patch removes the need for and support for re-transforming already
transformed expressions. The index case is dealt with by adding a flag
to struct IndexStmt to indicate that it's already been transformed;
which has some benefit anyway in that tablecmds.c can now Assert that
transformation has happened rather than just assuming. The other main
reason was some rather sloppy code for array type coercion, which can
be fixed (and its performance improved too) by refactoring.
I did leave transformJoinUsingClause() still constructing expressions
containing untransformed operator nodes being applied to Vars, so that
transformExpr() still has to allow Var inputs. But that's a much narrower,
and safer, special case than before, since Vars will never appear in a raw
parse tree, and they don't have any substructure to worry about.
In passing fix some oversights in the patch that added CREATE INDEX
IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These
appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
|
|
|
WRITE_BOOL_FIELD(if_not_exists);
|
Fix tablespace inheritance for partitioned rels
Commit ca4103025dfe left a few loose ends. The most important one
(broken pg_dump output) is already fixed by virtue of commit
3b23552ad8bb, but some things remained:
* When ALTER TABLE rewrites tables, the indexes must remain in the
tablespace they were originally in. This didn't work because
index recreation during ALTER TABLE runs manufactured SQL (yuck),
which runs afoul of default_tablespace in competition with the parent
relation tablespace. To fix, reset default_tablespace to the empty
string temporarily, and add the TABLESPACE clause as appropriate.
* Setting a partitioned rel's tablespace to the database default is
confusing; if it worked, it would direct the partitions to that
tablespace regardless of default_tablespace. But in reality it does
not work, and making it work is a larger project. Therefore, throw
an error when this condition is detected, to alert the unwary.
Add some docs and tests, too.
Author: Álvaro Herrera
Discussion: https://postgr.es/m/CAKJS1f_1c260nOt_vBJ067AZ3JXptXVRohDVMLEBmudX1YEx-A@mail.gmail.com
2019-04-25 16:20:23 +02:00
|
|
|
WRITE_BOOL_FIELD(reset_default_tblspc);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
static void
|
|
|
|
_outCreateStatsStmt(StringInfo str, const CreateStatsStmt *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("CREATESTATSSTMT");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(defnames);
|
Change CREATE STATISTICS syntax
Previously, we had the WITH clause in the middle of the command, where
you'd specify both generic options as well as statistic types. Few
people liked this, so this commit changes it to remove the WITH keyword
from that clause and makes it accept statistic types only. (We
currently don't have any generic options, but if we invent in the
future, we will gain a new WITH clause, probably at the end of the
command).
Also, the column list is now specified without parens, which makes the
whole command look more similar to a SELECT command. This change will
let us expand the command to supporting expressions (not just columns
names) as well as multiple tables and their join conditions.
Tom added lots of code comments and fixed some parts of the CREATE
STATISTICS reference page, too; more changes in this area are
forthcoming. He also fixed a potential problem in the alter_generic
regression test, reducing verbosity on a cascaded drop to avoid
dependency on message ordering, as we do in other tests.
Tom also closed a security bug: we documented that table ownership was
required in order to create a statistics object on it, but didn't
actually implement it.
Implement tab-completion for statistics objects. This can stand some
more improvement.
Authors: Alvaro Herrera, with lots of cleanup by Tom Lane
Discussion: https://postgr.es/m/20170420212426.ltvgyhnefvhixm6i@alvherre.pgsql
2017-05-12 19:59:23 +02:00
|
|
|
WRITE_NODE_FIELD(stat_types);
|
|
|
|
WRITE_NODE_FIELD(exprs);
|
|
|
|
WRITE_NODE_FIELD(relations);
|
Clone extended stats in CREATE TABLE (LIKE INCLUDING ALL)
The LIKE INCLUDING ALL clause to CREATE TABLE intuitively indicates
cloning of extended statistics on the source table, but it failed to do
so. Patch it up so that it does. Also include an INCLUDING STATISTICS
option to the LIKE clause, so that the behavior can be requested
individually, or excluded individually.
While at it, reorder the INCLUDING options, both in code and in docs, in
alphabetical order which makes more sense than feature-implementation
order that was previously used.
Backpatch this to Postgres 10, where extended statistics were
introduced, because this is seen as an oversight in a fresh feature
which is better to get consistent from the get-go instead of changing
only in pg11.
In pg11, comments on statistics objects are cloned too. In pg10 they
are not, because I (Álvaro) was too coward to change the parse node as
required to support it. Also, in pg10 I chose not to renumber the
parser symbols for the various INCLUDING options in LIKE, for the same
reason. Any corresponding user-visible changes (docs) are backpatched,
though.
Reported-by: Stephen Froehlich
Author: David Rowley
Reviewed-by: Álvaro Herrera, Tomas Vondra
Discussion: https://postgr.es/m/CY1PR0601MB1927315B45667A1B679D0FD5E5EF0@CY1PR0601MB1927.namprd06.prod.outlook.com
2018-03-05 23:37:19 +01:00
|
|
|
WRITE_STRING_FIELD(stxcomment);
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
WRITE_BOOL_FIELD(if_not_exists);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outNotifyStmt(StringInfo str, const NotifyStmt *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("NOTIFY");
|
|
|
|
|
2008-09-01 22:42:46 +02:00
|
|
|
WRITE_STRING_FIELD(conditionname);
|
2010-02-16 23:34:57 +01:00
|
|
|
WRITE_STRING_FIELD(payload);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
2003-03-10 04:53:52 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outDeclareCursorStmt(StringInfo str, const DeclareCursorStmt *node)
|
2003-03-10 04:53:52 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("DECLARECURSOR");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(portalname);
|
|
|
|
WRITE_INT_FIELD(options);
|
|
|
|
WRITE_NODE_FIELD(query);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSelectStmt(StringInfo str, const SelectStmt *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SELECT");
|
|
|
|
|
2004-06-09 21:08:20 +02:00
|
|
|
WRITE_NODE_FIELD(distinctClause);
|
2007-04-28 00:05:49 +02:00
|
|
|
WRITE_NODE_FIELD(intoClause);
|
2004-06-09 21:08:20 +02:00
|
|
|
WRITE_NODE_FIELD(targetList);
|
|
|
|
WRITE_NODE_FIELD(fromClause);
|
|
|
|
WRITE_NODE_FIELD(whereClause);
|
|
|
|
WRITE_NODE_FIELD(groupClause);
|
|
|
|
WRITE_NODE_FIELD(havingClause);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_NODE_FIELD(windowClause);
|
2006-08-02 03:59:48 +02:00
|
|
|
WRITE_NODE_FIELD(valuesLists);
|
2004-06-09 21:08:20 +02:00
|
|
|
WRITE_NODE_FIELD(sortClause);
|
|
|
|
WRITE_NODE_FIELD(limitOffset);
|
|
|
|
WRITE_NODE_FIELD(limitCount);
|
2005-08-01 22:31:16 +02:00
|
|
|
WRITE_NODE_FIELD(lockingClause);
|
2012-07-31 23:56:21 +02:00
|
|
|
WRITE_NODE_FIELD(withClause);
|
2004-06-09 21:08:20 +02:00
|
|
|
WRITE_ENUM_FIELD(op, SetOperation);
|
|
|
|
WRITE_BOOL_FIELD(all);
|
|
|
|
WRITE_NODE_FIELD(larg);
|
|
|
|
WRITE_NODE_FIELD(rarg);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outFuncCall(StringInfo str, const FuncCall *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("FUNCCALL");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(funcname);
|
|
|
|
WRITE_NODE_FIELD(args);
|
2009-12-15 18:57:48 +01:00
|
|
|
WRITE_NODE_FIELD(agg_order);
|
2013-07-17 02:15:36 +02:00
|
|
|
WRITE_NODE_FIELD(agg_filter);
|
Support ordered-set (WITHIN GROUP) aggregates.
This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()). We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.
Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions. To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c. This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need. There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.
In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates. Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.
Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
|
|
|
WRITE_BOOL_FIELD(agg_within_group);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_BOOL_FIELD(agg_star);
|
|
|
|
WRITE_BOOL_FIELD(agg_distinct);
|
2008-07-16 03:30:23 +02:00
|
|
|
WRITE_BOOL_FIELD(func_variadic);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_NODE_FIELD(over);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
2004-06-09 21:08:20 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outDefElem(StringInfo str, const DefElem *node)
|
2004-06-09 21:08:20 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("DEFELEM");
|
|
|
|
|
2009-04-04 23:12:31 +02:00
|
|
|
WRITE_STRING_FIELD(defnamespace);
|
2004-06-09 21:08:20 +02:00
|
|
|
WRITE_STRING_FIELD(defname);
|
|
|
|
WRITE_NODE_FIELD(arg);
|
2009-04-04 23:12:31 +02:00
|
|
|
WRITE_ENUM_FIELD(defaction, DefElemAction);
|
2016-09-06 18:00:00 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2009-02-02 20:31:40 +01:00
|
|
|
}
|
|
|
|
|
2010-11-13 06:34:45 +01:00
|
|
|
static void
|
2012-01-07 13:58:13 +01:00
|
|
|
_outTableLikeClause(StringInfo str, const TableLikeClause *node)
|
2010-11-13 06:34:45 +01:00
|
|
|
{
|
2012-01-07 13:58:13 +01:00
|
|
|
WRITE_NODE_TYPE("TABLELIKECLAUSE");
|
2010-11-13 06:34:45 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(relation);
|
|
|
|
WRITE_UINT_FIELD(options);
|
|
|
|
}
|
|
|
|
|
2005-08-01 22:31:16 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outLockingClause(StringInfo str, const LockingClause *node)
|
2005-08-01 22:31:16 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("LOCKINGCLAUSE");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(lockedRels);
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
WRITE_ENUM_FIELD(strength, LockClauseStrength);
|
2014-10-07 22:23:34 +02:00
|
|
|
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
|
2005-08-01 22:31:16 +02:00
|
|
|
}
|
|
|
|
|
2007-02-03 15:06:56 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outXmlSerialize(StringInfo str, const XmlSerialize *node)
|
2007-02-03 15:06:56 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("XMLSERIALIZE");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(xmloption, XmlOptionType);
|
|
|
|
WRITE_NODE_FIELD(expr);
|
2009-07-16 08:33:46 +02:00
|
|
|
WRITE_NODE_FIELD(typeName);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2007-02-03 15:06:56 +01:00
|
|
|
}
|
|
|
|
|
2016-11-04 16:49:50 +01:00
|
|
|
static void
|
|
|
|
_outTriggerTransition(StringInfo str, const TriggerTransition *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("TRIGGERTRANSITION");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(name);
|
|
|
|
WRITE_BOOL_FIELD(isNew);
|
|
|
|
WRITE_BOOL_FIELD(isTable);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outColumnDef(StringInfo str, const ColumnDef *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("COLUMNDEF");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(colname);
|
2009-07-16 08:33:46 +02:00
|
|
|
WRITE_NODE_FIELD(typeName);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_INT_FIELD(inhcount);
|
|
|
|
WRITE_BOOL_FIELD(is_local);
|
|
|
|
WRITE_BOOL_FIELD(is_not_null);
|
Remove collation information from TypeName, where it does not belong.
The initial collations patch treated a COLLATE spec as part of a TypeName,
following what can only be described as brain fade on the part of the SQL
committee. It's a lot more reasonable to treat COLLATE as a syntactically
separate object, so that it can be added in only the productions where it
actually belongs, rather than needing to reject it in a boatload of places
where it doesn't belong (something the original patch mostly failed to do).
In addition this change lets us meet the spec's requirement to allow
COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly
behavior for constructs such as "foo::type COLLATE collation".
To do this, pull collation information out of TypeName and put it in
ColumnDef instead, thus reverting most of the collation-related changes in
parse_type.c's API. I made one additional structural change, which was to
use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd
nodes. This provides enough room to get rid of the "transform" wart in
AlterTableCmd too, since the ColumnDef can carry the USING expression
easily enough.
Also fix some other minor bugs that have crept in in the same areas,
like failure to copy recently-added fields of ColumnDef in copyfuncs.c.
While at it, document the formerly secret ability to specify a collation
in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and
ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about
what the default collation selection will be when COLLATE is omitted.
BTW, the three-parameter form of format_type() should go away too,
since it just contributes to the confusion in this area; but I'll do
that in a separate patch.
2011-03-10 04:38:52 +01:00
|
|
|
WRITE_BOOL_FIELD(is_from_type);
|
2012-04-18 16:43:16 +02:00
|
|
|
WRITE_CHAR_FIELD(storage);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(raw_default);
|
2009-10-06 02:55:26 +02:00
|
|
|
WRITE_NODE_FIELD(cooked_default);
|
2017-04-06 14:33:16 +02:00
|
|
|
WRITE_CHAR_FIELD(identity);
|
2018-02-02 20:20:50 +01:00
|
|
|
WRITE_NODE_FIELD(identitySequence);
|
2019-03-30 08:13:09 +01:00
|
|
|
WRITE_CHAR_FIELD(generated);
|
Remove collation information from TypeName, where it does not belong.
The initial collations patch treated a COLLATE spec as part of a TypeName,
following what can only be described as brain fade on the part of the SQL
committee. It's a lot more reasonable to treat COLLATE as a syntactically
separate object, so that it can be added in only the productions where it
actually belongs, rather than needing to reject it in a boatload of places
where it doesn't belong (something the original patch mostly failed to do).
In addition this change lets us meet the spec's requirement to allow
COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly
behavior for constructs such as "foo::type COLLATE collation".
To do this, pull collation information out of TypeName and put it in
ColumnDef instead, thus reverting most of the collation-related changes in
parse_type.c's API. I made one additional structural change, which was to
use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd
nodes. This provides enough room to get rid of the "transform" wart in
AlterTableCmd too, since the ColumnDef can carry the USING expression
easily enough.
Also fix some other minor bugs that have crept in in the same areas,
like failure to copy recently-added fields of ColumnDef in copyfuncs.c.
While at it, document the formerly secret ability to specify a collation
in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and
ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about
what the default collation selection will be when COLLATE is omitted.
BTW, the three-parameter form of format_type() should go away too,
since it just contributes to the confusion in this area; but I'll do
that in a separate patch.
2011-03-10 04:38:52 +01:00
|
|
|
WRITE_NODE_FIELD(collClause);
|
|
|
|
WRITE_OID_FIELD(collOid);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(constraints);
|
2011-08-05 19:24:03 +02:00
|
|
|
WRITE_NODE_FIELD(fdwoptions);
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outTypeName(StringInfo str, const TypeName *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("TYPENAME");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(names);
|
2009-07-16 08:33:46 +02:00
|
|
|
WRITE_OID_FIELD(typeOid);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_BOOL_FIELD(setof);
|
|
|
|
WRITE_BOOL_FIELD(pct_type);
|
2006-12-30 22:21:56 +01:00
|
|
|
WRITE_NODE_FIELD(typmods);
|
|
|
|
WRITE_INT_FIELD(typemod);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(arrayBounds);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outTypeCast(StringInfo str, const TypeCast *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("TYPECAST");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
2009-07-16 08:33:46 +02:00
|
|
|
WRITE_NODE_FIELD(typeName);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
2011-02-08 22:04:18 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCollateClause(StringInfo str, const CollateClause *node)
|
2011-02-08 22:04:18 +01:00
|
|
|
{
|
2011-03-11 22:27:51 +01:00
|
|
|
WRITE_NODE_TYPE("COLLATECLAUSE");
|
2011-02-08 22:04:18 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
2011-03-11 22:27:51 +01:00
|
|
|
WRITE_NODE_FIELD(collname);
|
2011-02-08 22:04:18 +01:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outIndexElem(StringInfo str, const IndexElem *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("INDEXELEM");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(name);
|
2003-05-28 18:04:02 +02:00
|
|
|
WRITE_NODE_FIELD(expr);
|
Adjust naming of indexes and their columns per recent discussion.
Index expression columns are now named after the FigureColname result for
their expressions, rather than always being "pg_expression_N". Digits are
appended to this name if needed to make the column name unique within the
index. (That happens for regular columns too, thus fixing the old problem
that CREATE INDEX fooi ON foo (f1, f1) fails. Before exclusion indexes
there was no real reason to do such a thing, but now maybe there is.)
Default names for indexes and associated constraints now include the column
names of all their columns, not only the first one as in previous practice.
(Of course, this will be truncated as needed to fit in NAMEDATALEN. Also,
pkey indexes retain the historical behavior of not naming specific columns
at all.)
An example of the results:
regression=# create table foo (f1 int, f2 text,
regression(# exclude (f1 with =, lower(f2) with =));
NOTICE: CREATE TABLE / EXCLUDE will create implicit index "foo_f1_lower_exclusion" for table "foo"
CREATE TABLE
regression=# \d foo_f1_lower_exclusion
Index "public.foo_f1_lower_exclusion"
Column | Type | Definition
--------+---------+------------
f1 | integer | f1
lower | text | lower(f2)
btree, for table "public.foo"
2009-12-23 03:35:25 +01:00
|
|
|
WRITE_STRING_FIELD(indexcolname);
|
2011-02-08 22:04:18 +01:00
|
|
|
WRITE_NODE_FIELD(collation);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(opclass);
|
2007-01-09 03:14:16 +01:00
|
|
|
WRITE_ENUM_FIELD(ordering, SortByDir);
|
|
|
|
WRITE_ENUM_FIELD(nulls_ordering, SortByNulls);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outQuery(StringInfo str, const Query *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("QUERY");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(commandType, CmdType);
|
|
|
|
WRITE_ENUM_FIELD(querySource, QuerySource);
|
2012-03-27 21:14:13 +02:00
|
|
|
/* we intentionally do not print the queryId field */
|
2003-05-02 22:54:36 +02:00
|
|
|
WRITE_BOOL_FIELD(canSetTag);
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Hack to work around missing outfuncs routines for a lot of the
|
|
|
|
* utility-statement node types. (The only one we actually *need* for
|
2005-10-15 04:49:52 +02:00
|
|
|
* rules support is NotifyStmt.) Someday we ought to support 'em all, but
|
|
|
|
* for the meantime do this to avoid getting lots of warnings when running
|
|
|
|
* with debug_print_parse on.
|
2002-12-12 16:49:42 +01:00
|
|
|
*/
|
|
|
|
if (node->utilityStmt)
|
2001-01-07 02:08:48 +01:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
switch (nodeTag(node->utilityStmt))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_CreateStmt:
|
|
|
|
case T_IndexStmt:
|
|
|
|
case T_NotifyStmt:
|
2003-03-10 04:53:52 +01:00
|
|
|
case T_DeclareCursorStmt:
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(utilityStmt);
|
|
|
|
break;
|
|
|
|
default:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " :utilityStmt ?");
|
2002-12-12 16:49:42 +01:00
|
|
|
break;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2002-12-12 16:49:42 +01:00
|
|
|
else
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " :utilityStmt <>");
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
WRITE_INT_FIELD(resultRelation);
|
|
|
|
WRITE_BOOL_FIELD(hasAggs);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_BOOL_FIELD(hasWindowFuncs);
|
2016-09-13 19:54:24 +02:00
|
|
|
WRITE_BOOL_FIELD(hasTargetSRFs);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_BOOL_FIELD(hasSubLinks);
|
2008-08-02 23:32:01 +02:00
|
|
|
WRITE_BOOL_FIELD(hasDistinctOn);
|
2008-10-04 23:56:55 +02:00
|
|
|
WRITE_BOOL_FIELD(hasRecursive);
|
2011-02-26 00:56:23 +01:00
|
|
|
WRITE_BOOL_FIELD(hasModifyingCTE);
|
2009-10-28 15:55:47 +01:00
|
|
|
WRITE_BOOL_FIELD(hasForUpdate);
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
WRITE_BOOL_FIELD(hasRowSecurity);
|
2008-10-04 23:56:55 +02:00
|
|
|
WRITE_NODE_FIELD(cteList);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(rtable);
|
|
|
|
WRITE_NODE_FIELD(jointree);
|
|
|
|
WRITE_NODE_FIELD(targetList);
|
2017-04-06 14:33:16 +02:00
|
|
|
WRITE_ENUM_FIELD(override, OverridingKind);
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
WRITE_NODE_FIELD(onConflict);
|
2006-08-12 04:52:06 +02:00
|
|
|
WRITE_NODE_FIELD(returningList);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(groupClause);
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
WRITE_NODE_FIELD(groupingSets);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(havingQual);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_NODE_FIELD(windowClause);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(distinctClause);
|
|
|
|
WRITE_NODE_FIELD(sortClause);
|
|
|
|
WRITE_NODE_FIELD(limitOffset);
|
|
|
|
WRITE_NODE_FIELD(limitCount);
|
2006-04-30 20:30:40 +02:00
|
|
|
WRITE_NODE_FIELD(rowMarks);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_NODE_FIELD(setOperations);
|
2010-08-07 04:44:09 +02:00
|
|
|
WRITE_NODE_FIELD(constraintDeps);
|
2018-09-18 21:08:28 +02:00
|
|
|
WRITE_NODE_FIELD(withCheckOptions);
|
Change representation of statement lists, and add statement location info.
This patch makes several changes that improve the consistency of
representation of lists of statements. It's always been the case
that the output of parse analysis is a list of Query nodes, whatever
the types of the individual statements in the list. This patch brings
similar consistency to the outputs of raw parsing and planning steps:
* The output of raw parsing is now always a list of RawStmt nodes;
the statement-type-dependent nodes are one level down from that.
* The output of pg_plan_queries() is now always a list of PlannedStmt
nodes, even for utility statements. In the case of a utility statement,
"planning" just consists of wrapping a CMD_UTILITY PlannedStmt around
the utility node. This list representation is now used in Portal and
CachedPlan plan lists, replacing the former convention of intermixing
PlannedStmts with bare utility-statement nodes.
Now, every list of statements has a consistent head-node type depending
on how far along it is in processing. This allows changing many places
that formerly used generic "Node *" pointers to use a more specific
pointer type, thus reducing the number of IsA() tests and casts needed,
as well as improving code clarity.
Also, the post-parse-analysis representation of DECLARE CURSOR is changed
so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained
SELECT remains a child of the DeclareCursorStmt rather than getting flipped
around to be the other way. It's now true for both Query and PlannedStmt
that utilityStmt is non-null if and only if commandType is CMD_UTILITY.
That allows simplifying a lot of places that were testing both fields.
(I think some of those were just defensive programming, but in many places,
it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.)
Because PlannedStmt carries a canSetTag field, we're also able to get rid
of some ad-hoc rules about how to reconstruct canSetTag for a bare utility
statement; specifically, the assumption that a utility is canSetTag if and
only if it's the only one in its list. While I see no near-term need for
relaxing that restriction, it's nice to get rid of the ad-hocery.
The API of ProcessUtility() is changed so that what it's passed is the
wrapper PlannedStmt not just the bare utility statement. This will affect
all users of ProcessUtility_hook, but the changes are pretty trivial; see
the affected contrib modules for examples of the minimum change needed.
(Most compilers should give pointer-type-mismatch warnings for uncorrected
code.)
There's also a change in the API of ExplainOneQuery_hook, to pass through
cursorOptions instead of expecting hook functions to know what to pick.
This is needed because of the DECLARE CURSOR changes, but really should
have been done in 9.6; it's unlikely that any extant hook functions
know about using CURSOR_OPT_PARALLEL_OK.
Finally, teach gram.y to save statement boundary locations in RawStmt
nodes, and pass those through to Query and PlannedStmt nodes. This allows
more intelligent handling of cases where a source query string contains
multiple statements. This patch doesn't actually do anything with the
information, but a follow-on patch will. (Passing this information through
cleanly is the true motivation for these changes; while I think this is all
good cleanup, it's unlikely we'd have bothered without this end goal.)
catversion bump because addition of location fields to struct Query
affects stored rules.
This patch is by me, but it owes a good deal to Fabien Coelho who did
a lot of preliminary work on the problem, and also reviewed the patch.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
|
|
|
WRITE_LOCATION_FIELD(stmt_location);
|
|
|
|
WRITE_LOCATION_FIELD(stmt_len);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
2013-07-18 23:10:16 +02:00
|
|
|
static void
|
|
|
|
_outWithCheckOption(StringInfo str, const WithCheckOption *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("WITHCHECKOPTION");
|
|
|
|
|
2015-04-25 02:34:26 +02:00
|
|
|
WRITE_ENUM_FIELD(kind, WCOKind);
|
|
|
|
WRITE_STRING_FIELD(relname);
|
2015-09-15 21:49:31 +02:00
|
|
|
WRITE_STRING_FIELD(polname);
|
2013-07-18 23:10:16 +02:00
|
|
|
WRITE_NODE_FIELD(qual);
|
|
|
|
WRITE_BOOL_FIELD(cascaded);
|
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSortGroupClause(StringInfo str, const SortGroupClause *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
2008-08-02 23:32:01 +02:00
|
|
|
WRITE_NODE_TYPE("SORTGROUPCLAUSE");
|
2002-12-12 16:49:42 +01:00
|
|
|
|
|
|
|
WRITE_UINT_FIELD(tleSortGroupRef);
|
2008-08-02 23:32:01 +02:00
|
|
|
WRITE_OID_FIELD(eqop);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_OID_FIELD(sortop);
|
2007-01-09 03:14:16 +01:00
|
|
|
WRITE_BOOL_FIELD(nulls_first);
|
2010-10-31 02:55:20 +01:00
|
|
|
WRITE_BOOL_FIELD(hashable);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
static void
|
|
|
|
_outGroupingSet(StringInfo str, const GroupingSet *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("GROUPINGSET");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(kind, GroupingSetKind);
|
|
|
|
WRITE_NODE_FIELD(content);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2008-12-28 19:54:01 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outWindowClause(StringInfo str, const WindowClause *node)
|
2008-12-28 19:54:01 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("WINDOWCLAUSE");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(name);
|
|
|
|
WRITE_STRING_FIELD(refname);
|
|
|
|
WRITE_NODE_FIELD(partitionClause);
|
|
|
|
WRITE_NODE_FIELD(orderClause);
|
2008-12-31 01:08:39 +01:00
|
|
|
WRITE_INT_FIELD(frameOptions);
|
2010-02-12 18:33:21 +01:00
|
|
|
WRITE_NODE_FIELD(startOffset);
|
|
|
|
WRITE_NODE_FIELD(endOffset);
|
Support all SQL:2011 options for window frame clauses.
This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING"
frame boundaries in window functions. We'd punted on that back in the
original patch to add window functions, because it was not clear how to
do it in a reasonably data-type-extensible fashion. That problem is
resolved here by adding the ability for btree operator classes to provide
an "in_range" support function that defines how to add or subtract the
RANGE offset value. Factoring it this way also allows the operator class
to avoid overflow problems near the ends of the datatype's range, if it
wishes to expend effort on that. (In the committed patch, the integer
opclasses handle that issue, but it did not seem worth the trouble to
avoid overflow failures for datetime types.)
The patch includes in_range support for the integer_ops opfamily
(int2/int4/int8) as well as the standard datetime types. Support for
other numeric types has been requested, but that seems like suitable
material for a follow-on patch.
In addition, the patch adds GROUPS mode which counts the offset in
ORDER-BY peer groups rather than rows, and it adds the frame_exclusion
options specified by SQL:2011. As far as I can see, we are now fully
up to spec on window framing options.
Existing behaviors remain unchanged, except that I changed the errcode
for a couple of existing error reports to meet the SQL spec's expectation
that negative "offset" values should be reported as SQLSTATE 22013.
Internally and in relevant parts of the documentation, we now consistently
use the terminology "offset PRECEDING/FOLLOWING" rather than "value
PRECEDING/FOLLOWING", since the term "value" is confusingly vague.
Oliver Ford, reviewed and whacked around some by me
Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
2018-02-07 06:06:50 +01:00
|
|
|
WRITE_OID_FIELD(startInRangeFunc);
|
|
|
|
WRITE_OID_FIELD(endInRangeFunc);
|
|
|
|
WRITE_OID_FIELD(inRangeColl);
|
|
|
|
WRITE_BOOL_FIELD(inRangeAsc);
|
|
|
|
WRITE_BOOL_FIELD(inRangeNullsFirst);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_UINT_FIELD(winref);
|
|
|
|
WRITE_BOOL_FIELD(copiedOrder);
|
|
|
|
}
|
|
|
|
|
2006-04-30 20:30:40 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRowMarkClause(StringInfo str, const RowMarkClause *node)
|
2006-04-30 20:30:40 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("ROWMARKCLAUSE");
|
|
|
|
|
|
|
|
WRITE_UINT_FIELD(rti);
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
WRITE_ENUM_FIELD(strength, LockClauseStrength);
|
2014-10-07 22:23:34 +02:00
|
|
|
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
|
2009-10-28 15:55:47 +01:00
|
|
|
WRITE_BOOL_FIELD(pushedDown);
|
2006-04-30 20:30:40 +02:00
|
|
|
}
|
|
|
|
|
2008-10-04 23:56:55 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outWithClause(StringInfo str, const WithClause *node)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("WITHCLAUSE");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(ctes);
|
|
|
|
WRITE_BOOL_FIELD(recursive);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("COMMONTABLEEXPR");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(ctename);
|
|
|
|
WRITE_NODE_FIELD(aliascolnames);
|
Allow user control of CTE materialization, and change the default behavior.
Historically we've always materialized the full output of a CTE query,
treating WITH as an optimization fence (so that, for example, restrictions
from the outer query cannot be pushed into it). This is appropriate when
the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE
query is non-recursive and side-effect-free, there's no hazard of changing
the query results by pushing restrictions down.
Another argument for materialization is that it can avoid duplicate
computation of an expensive WITH query --- but that only applies if
the WITH query is called more than once in the outer query. Even then
it could still be a net loss, if each call has restrictions that
would allow just a small part of the WITH query to be computed.
Hence, let's change the behavior for WITH queries that are non-recursive
and side-effect-free. By default, we will inline them into the outer
query (removing the optimization fence) if they are called just once.
If they are called more than once, we will keep the old behavior by
default, but the user can override this and force inlining by specifying
NOT MATERIALIZED. Lastly, the user can force the old behavior by
specifying MATERIALIZED; this would mainly be useful when the query had
deliberately been employing WITH as an optimization fence to prevent a
poor choice of plan.
Andreas Karlsson, Andrew Gierth, David Fetter
Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk
2019-02-16 22:11:12 +01:00
|
|
|
WRITE_ENUM_FIELD(ctematerialized, CTEMaterialize);
|
2008-10-04 23:56:55 +02:00
|
|
|
WRITE_NODE_FIELD(ctequery);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
WRITE_BOOL_FIELD(cterecursive);
|
|
|
|
WRITE_INT_FIELD(cterefcount);
|
|
|
|
WRITE_NODE_FIELD(ctecolnames);
|
|
|
|
WRITE_NODE_FIELD(ctecoltypes);
|
|
|
|
WRITE_NODE_FIELD(ctecoltypmods);
|
2011-02-08 22:04:18 +01:00
|
|
|
WRITE_NODE_FIELD(ctecolcollations);
|
2008-10-04 23:56:55 +02:00
|
|
|
}
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SETOPERATIONSTMT");
|
|
|
|
|
|
|
|
WRITE_ENUM_FIELD(op, SetOperation);
|
|
|
|
WRITE_BOOL_FIELD(all);
|
|
|
|
WRITE_NODE_FIELD(larg);
|
|
|
|
WRITE_NODE_FIELD(rarg);
|
2004-05-26 06:41:50 +02:00
|
|
|
WRITE_NODE_FIELD(colTypes);
|
2006-08-10 04:36:29 +02:00
|
|
|
WRITE_NODE_FIELD(colTypmods);
|
2011-02-08 22:04:18 +01:00
|
|
|
WRITE_NODE_FIELD(colCollations);
|
2008-08-07 03:11:52 +02:00
|
|
|
WRITE_NODE_FIELD(groupClauses);
|
2002-12-12 16:49:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRangeTblEntry(StringInfo str, const RangeTblEntry *node)
|
2002-12-12 16:49:42 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RTE");
|
|
|
|
|
|
|
|
/* put alias + eref first to make dump more legible */
|
|
|
|
WRITE_NODE_FIELD(alias);
|
|
|
|
WRITE_NODE_FIELD(eref);
|
|
|
|
WRITE_ENUM_FIELD(rtekind, RTEKind);
|
|
|
|
|
|
|
|
switch (node->rtekind)
|
|
|
|
{
|
|
|
|
case RTE_RELATION:
|
|
|
|
WRITE_OID_FIELD(relid);
|
2011-02-23 01:23:23 +01:00
|
|
|
WRITE_CHAR_FIELD(relkind);
|
Create an RTE field to record the query's lock mode for each relation.
Add RangeTblEntry.rellockmode, which records the appropriate lock mode for
each RTE_RELATION rangetable entry (either AccessShareLock, RowShareLock,
or RowExclusiveLock depending on the RTE's role in the query).
This patch creates the field and makes all creators of RTE nodes fill it
in reasonably, but for the moment nothing much is done with it. The plan
is to replace assorted post-parser logic that re-determines the right
lockmode to use with simple uses of rte->rellockmode. For now, just add
Asserts in each of those places that the rellockmode matches what they are
computing today. (In some cases the match isn't perfect, so the Asserts
are weaker than you might expect; but this seems OK, as per discussion.)
This passes check-world for me, but it seems worth pushing in this state
to see if the buildfarm finds any problems in cases I failed to test.
catversion bump due to change of stored rules.
Amit Langote, reviewed by David Rowley and Jesper Pedersen,
and whacked around a bit more by me
Discussion: https://postgr.es/m/468c85d9-540e-66a2-1dde-fec2b741e688@lab.ntt.co.jp
2018-09-30 19:55:51 +02:00
|
|
|
WRITE_INT_FIELD(rellockmode);
|
2015-05-15 20:37:10 +02:00
|
|
|
WRITE_NODE_FIELD(tablesample);
|
2002-12-12 16:49:42 +01:00
|
|
|
break;
|
|
|
|
case RTE_SUBQUERY:
|
|
|
|
WRITE_NODE_FIELD(subquery);
|
2011-12-22 22:15:57 +01:00
|
|
|
WRITE_BOOL_FIELD(security_barrier);
|
2002-12-12 16:49:42 +01:00
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case RTE_JOIN:
|
|
|
|
WRITE_ENUM_FIELD(jointype, JoinType);
|
|
|
|
WRITE_NODE_FIELD(joinaliasvars);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case RTE_FUNCTION:
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
WRITE_NODE_FIELD(functions);
|
2013-07-29 17:38:01 +02:00
|
|
|
WRITE_BOOL_FIELD(funcordinality);
|
2002-12-12 16:49:42 +01:00
|
|
|
break;
|
2017-03-08 16:39:37 +01:00
|
|
|
case RTE_TABLEFUNC:
|
|
|
|
WRITE_NODE_FIELD(tablefunc);
|
|
|
|
break;
|
2006-08-02 03:59:48 +02:00
|
|
|
case RTE_VALUES:
|
|
|
|
WRITE_NODE_FIELD(values_lists);
|
Fix reporting of column typmods for multi-row VALUES constructs.
expandRTE() and get_rte_attribute_type() reported the exprType() and
exprTypmod() values of the expressions in the first row of the VALUES as
being the column type/typmod returned by the VALUES RTE. That's fine for
the data type, since we coerce all expressions in a column to have the same
common type. But we don't coerce them to have a common typmod, so it was
possible for rows after the first one to return values that violate the
claimed column typmod. This leads to the incorrect result seen in bug
#14448 from Hassan Mahmood, as well as some other corner-case misbehaviors.
The desired behavior is the same as we use in other type-unification
cases: report the common typmod if there is one, but otherwise return -1
indicating no particular constraint. It's cheap for transformValuesClause
to determine the common typmod while transforming a multi-row VALUES, but
it'd be less cheap for expandRTE() and get_rte_attribute_type() to
re-determine that info every time they're asked --- possibly a lot less
cheap, if the VALUES has many rows. Therefore, the best fix is to record
the common typmods explicitly in a list in the VALUES RTE, as we were
already doing for column collations. This looks quite a bit like what
we're doing for CTE RTEs, so we can save a little bit of space and code by
unifying the representation for those two RTE types. They both now share
coltypes/coltypmods/colcollations fields. (At some point it might seem
desirable to populate those fields for all RTE types; but right now it
looks like constructing them for other RTE types would add more code and
cycles than it would save.)
The RTE change requires a catversion bump, so this fix is only usable
in HEAD. If we fix this at all in the back branches, the patch will
need to look quite different.
Report: https://postgr.es/m/20161205143037.4377.60754@wrigleys.postgresql.org
Discussion: https://postgr.es/m/27429.1480968538@sss.pgh.pa.us
2016-12-08 17:40:02 +01:00
|
|
|
WRITE_NODE_FIELD(coltypes);
|
|
|
|
WRITE_NODE_FIELD(coltypmods);
|
|
|
|
WRITE_NODE_FIELD(colcollations);
|
2006-08-02 03:59:48 +02:00
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case RTE_CTE:
|
|
|
|
WRITE_STRING_FIELD(ctename);
|
|
|
|
WRITE_UINT_FIELD(ctelevelsup);
|
|
|
|
WRITE_BOOL_FIELD(self_reference);
|
Fix reporting of column typmods for multi-row VALUES constructs.
expandRTE() and get_rte_attribute_type() reported the exprType() and
exprTypmod() values of the expressions in the first row of the VALUES as
being the column type/typmod returned by the VALUES RTE. That's fine for
the data type, since we coerce all expressions in a column to have the same
common type. But we don't coerce them to have a common typmod, so it was
possible for rows after the first one to return values that violate the
claimed column typmod. This leads to the incorrect result seen in bug
#14448 from Hassan Mahmood, as well as some other corner-case misbehaviors.
The desired behavior is the same as we use in other type-unification
cases: report the common typmod if there is one, but otherwise return -1
indicating no particular constraint. It's cheap for transformValuesClause
to determine the common typmod while transforming a multi-row VALUES, but
it'd be less cheap for expandRTE() and get_rte_attribute_type() to
re-determine that info every time they're asked --- possibly a lot less
cheap, if the VALUES has many rows. Therefore, the best fix is to record
the common typmods explicitly in a list in the VALUES RTE, as we were
already doing for column collations. This looks quite a bit like what
we're doing for CTE RTEs, so we can save a little bit of space and code by
unifying the representation for those two RTE types. They both now share
coltypes/coltypmods/colcollations fields. (At some point it might seem
desirable to populate those fields for all RTE types; but right now it
looks like constructing them for other RTE types would add more code and
cycles than it would save.)
The RTE change requires a catversion bump, so this fix is only usable
in HEAD. If we fix this at all in the back branches, the patch will
need to look quite different.
Report: https://postgr.es/m/20161205143037.4377.60754@wrigleys.postgresql.org
Discussion: https://postgr.es/m/27429.1480968538@sss.pgh.pa.us
2016-12-08 17:40:02 +01:00
|
|
|
WRITE_NODE_FIELD(coltypes);
|
|
|
|
WRITE_NODE_FIELD(coltypmods);
|
|
|
|
WRITE_NODE_FIELD(colcollations);
|
2002-12-12 16:49:42 +01:00
|
|
|
break;
|
2017-04-01 06:17:18 +02:00
|
|
|
case RTE_NAMEDTUPLESTORE:
|
|
|
|
WRITE_STRING_FIELD(enrname);
|
2017-06-14 22:19:46 +02:00
|
|
|
WRITE_FLOAT_FIELD(enrtuples, "%.0f");
|
2017-04-01 06:17:18 +02:00
|
|
|
WRITE_OID_FIELD(relid);
|
|
|
|
WRITE_NODE_FIELD(coltypes);
|
|
|
|
WRITE_NODE_FIELD(coltypmods);
|
|
|
|
WRITE_NODE_FIELD(colcollations);
|
|
|
|
break;
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
case RTE_RESULT:
|
|
|
|
/* no extra fields */
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
default:
|
2003-07-28 02:09:16 +02:00
|
|
|
elog(ERROR, "unrecognized RTE kind: %d", (int) node->rtekind);
|
2002-12-12 16:49:42 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2012-08-08 01:02:54 +02:00
|
|
|
WRITE_BOOL_FIELD(lateral);
|
2002-12-12 16:49:42 +01:00
|
|
|
WRITE_BOOL_FIELD(inh);
|
|
|
|
WRITE_BOOL_FIELD(inFromCl);
|
2004-01-15 00:01:55 +01:00
|
|
|
WRITE_UINT_FIELD(requiredPerms);
|
2005-06-28 07:09:14 +02:00
|
|
|
WRITE_OID_FIELD(checkAsUser);
|
2009-01-22 21:16:10 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(selectedCols);
|
2015-05-08 00:20:46 +02:00
|
|
|
WRITE_BITMAPSET_FIELD(insertedCols);
|
|
|
|
WRITE_BITMAPSET_FIELD(updatedCols);
|
2019-03-30 08:13:09 +01:00
|
|
|
WRITE_BITMAPSET_FIELD(extraUpdatedCols);
|
2014-04-13 03:04:58 +02:00
|
|
|
WRITE_NODE_FIELD(securityQuals);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
static void
|
|
|
|
_outRangeTblFunction(StringInfo str, const RangeTblFunction *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RANGETBLFUNCTION");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(funcexpr);
|
|
|
|
WRITE_INT_FIELD(funccolcount);
|
|
|
|
WRITE_NODE_FIELD(funccolnames);
|
|
|
|
WRITE_NODE_FIELD(funccoltypes);
|
|
|
|
WRITE_NODE_FIELD(funccoltypmods);
|
|
|
|
WRITE_NODE_FIELD(funccolcollations);
|
|
|
|
WRITE_BITMAPSET_FIELD(funcparams);
|
|
|
|
}
|
|
|
|
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
static void
|
|
|
|
_outTableSampleClause(StringInfo str, const TableSampleClause *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
|
|
|
|
|
|
|
|
WRITE_OID_FIELD(tsmhandler);
|
|
|
|
WRITE_NODE_FIELD(args);
|
|
|
|
WRITE_NODE_FIELD(repeatable);
|
|
|
|
}
|
|
|
|
|
1997-12-23 20:50:54 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAExpr(StringInfo str, const A_Expr *node)
|
1997-12-23 20:50:54 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("AEXPR");
|
|
|
|
|
2003-02-10 05:44:47 +01:00
|
|
|
switch (node->kind)
|
1998-05-10 01:46:35 +02:00
|
|
|
{
|
2003-02-10 05:44:47 +01:00
|
|
|
case AEXPR_OP:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoChar(str, ' ');
|
2003-02-10 05:44:47 +01:00
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
2003-06-29 02:33:44 +02:00
|
|
|
case AEXPR_OP_ANY:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoChar(str, ' ');
|
2003-06-29 02:33:44 +02:00
|
|
|
WRITE_NODE_FIELD(name);
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " ANY ");
|
2003-06-29 02:33:44 +02:00
|
|
|
break;
|
|
|
|
case AEXPR_OP_ALL:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoChar(str, ' ');
|
2003-06-29 02:33:44 +02:00
|
|
|
WRITE_NODE_FIELD(name);
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " ALL ");
|
2003-06-29 02:33:44 +02:00
|
|
|
break;
|
2003-02-10 05:44:47 +01:00
|
|
|
case AEXPR_DISTINCT:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " DISTINCT ");
|
2003-02-10 05:44:47 +01:00
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
2016-07-28 23:23:03 +02:00
|
|
|
case AEXPR_NOT_DISTINCT:
|
|
|
|
appendStringInfoString(str, " NOT_DISTINCT ");
|
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
2003-02-16 03:30:39 +01:00
|
|
|
case AEXPR_NULLIF:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " NULLIF ");
|
2003-02-16 03:30:39 +01:00
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
2003-02-10 05:44:47 +01:00
|
|
|
case AEXPR_OF:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " OF ");
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(name);
|
1998-05-10 01:46:35 +02:00
|
|
|
break;
|
2005-11-28 05:35:32 +01:00
|
|
|
case AEXPR_IN:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " IN ");
|
2005-11-28 05:35:32 +01:00
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
2015-02-23 18:46:46 +01:00
|
|
|
case AEXPR_LIKE:
|
|
|
|
appendStringInfoString(str, " LIKE ");
|
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
|
|
|
case AEXPR_ILIKE:
|
|
|
|
appendStringInfoString(str, " ILIKE ");
|
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
|
|
|
case AEXPR_SIMILAR:
|
|
|
|
appendStringInfoString(str, " SIMILAR ");
|
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
2015-02-22 19:57:56 +01:00
|
|
|
case AEXPR_BETWEEN:
|
|
|
|
appendStringInfoString(str, " BETWEEN ");
|
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
|
|
|
case AEXPR_NOT_BETWEEN:
|
|
|
|
appendStringInfoString(str, " NOT_BETWEEN ");
|
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
|
|
|
case AEXPR_BETWEEN_SYM:
|
|
|
|
appendStringInfoString(str, " BETWEEN_SYM ");
|
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
|
|
|
case AEXPR_NOT_BETWEEN_SYM:
|
|
|
|
appendStringInfoString(str, " NOT_BETWEEN_SYM ");
|
|
|
|
WRITE_NODE_FIELD(name);
|
|
|
|
break;
|
Make operator precedence follow the SQL standard more closely.
While the SQL standard is pretty vague on the overall topic of operator
precedence (because it never presents a unified BNF for all expressions),
it does seem reasonable to conclude from the spec for <boolean value
expression> that OR has the lowest precedence, then AND, then NOT, then IS
tests, then the six standard comparison operators, then everything else
(since any non-boolean operator in a WHERE clause would need to be an
argument of one of these).
We were only sort of on board with that: most notably, while "<" ">" and
"=" had properly low precedence, "<=" ">=" and "<>" were treated as generic
operators and so had significantly higher precedence. And "IS" tests were
even higher precedence than those, which is very clearly wrong per spec.
Another problem was that "foo NOT SOMETHING bar" constructs, such as
"x NOT LIKE y", were treated inconsistently because of a bison
implementation artifact: they had the documented precedence with respect
to operators to their right, but behaved like NOT (i.e., very low priority)
with respect to operators to their left.
Fixing the precedence issues is just a small matter of rearranging the
precedence declarations in gram.y, except for the NOT problem, which
requires adding an additional lookahead case in base_yylex() so that we
can attach a different token precedence to NOT LIKE and allied two-word
operators.
The bulk of this patch is not the bug fix per se, but adding logic to
parse_expr.c to allow giving warnings if an expression has changed meaning
because of these precedence changes. These warnings are off by default
and are enabled by the new GUC operator_precedence_warning. It's believed
that very few applications will be affected by these changes, but it was
agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
|
|
|
case AEXPR_PAREN:
|
|
|
|
appendStringInfoString(str, " PAREN");
|
|
|
|
break;
|
2000-05-26 00:43:12 +02:00
|
|
|
default:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " ??");
|
2000-05-26 00:43:12 +02:00
|
|
|
break;
|
1998-05-10 01:46:35 +02:00
|
|
|
}
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(lexpr);
|
|
|
|
WRITE_NODE_FIELD(rexpr);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
1997-12-23 20:50:54 +01:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outValue(StringInfo str, const Value *value)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-07 07:04:48 +02:00
|
|
|
switch (value->type)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
case T_Integer:
|
2018-03-12 17:17:58 +01:00
|
|
|
appendStringInfo(str, "%d", value->val.ival);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
|
|
|
case T_Float:
|
2004-08-29 07:07:03 +02:00
|
|
|
|
2000-04-12 19:17:23 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* We assume the value is a valid numeric literal and so does not
|
|
|
|
* need quoting.
|
2000-02-21 19:47:12 +01:00
|
|
|
*/
|
2004-01-31 06:09:41 +01:00
|
|
|
appendStringInfoString(str, value->val.str);
|
2000-02-21 19:47:12 +01:00
|
|
|
break;
|
|
|
|
case T_String:
|
Partial fix for dropped columns in functions returning composite.
When a view has a function-returning-composite in FROM, and there are
some dropped columns in the underlying composite type, ruleutils.c
printed junk in the column alias list for the reconstructed FROM entry.
Before 9.3, this was prevented by doing get_rte_attribute_is_dropped
tests while printing the column alias list; but that solution is not
currently available to us for reasons I'll explain below. Instead,
check for empty-string entries in the alias list, which can only exist
if that column position had been dropped at the time the view was made.
(The parser fills in empty strings to preserve the invariant that the
aliases correspond to physical column positions.)
While this is sufficient to handle the case of columns dropped before
the view was made, we have still got issues with columns dropped after
the view was made. In particular, the view could contain Vars that
explicitly reference such columns! The dependency machinery really
ought to refuse the column drop attempt in such cases, as it would do
when trying to drop a table column that's explicitly referenced in
views. However, we currently neglect to store dependencies on columns
of composite types, and fixing that is likely to be too big to be
back-patchable (not to mention that existing views in existing databases
would not have the needed pg_depend entries anyway). So I'll leave that
for a separate patch.
Pre-9.3, ruleutils would print such Vars normally (with their original
column names) even though it suppressed their entries in the RTE's
column alias list. This is certainly bogus, since the printed view
definition would fail to reload, but at least it didn't crash. However,
as of 9.3 the printed column alias list is tightly tied to the names
printed for Vars; so we can't treat columns as dropped for one purpose
and not dropped for the other. This is why we can't just put back the
get_rte_attribute_is_dropped test: it results in an assertion failure
if the view in fact contains any Vars referencing the dropped column.
Once we've got dependencies preventing such cases, we'll probably want
to do it that way instead of relying on the empty-string test used here.
This fix turned up a very ancient bug in outfuncs/readfuncs, namely
that T_String nodes containing empty strings were not dumped/reloaded
correctly: the node was printed as "<>" which is read as a string
value of <>. Since (per SQL) we disallow empty-string identifiers,
such nodes don't occur normally, which is why we'd not noticed.
(Such nodes aren't used for literal constants, just identifiers.)
Per report from Marc Schablewski. Back-patch to 9.3 which is where
the rule printing behavior changed. The dangling-variable case is
broken all the way back, but that's not what his complaint is about.
2014-07-19 20:28:22 +02:00
|
|
|
|
|
|
|
/*
|
2016-09-16 15:36:19 +02:00
|
|
|
* We use outToken to provide escaping of the string's content,
|
Partial fix for dropped columns in functions returning composite.
When a view has a function-returning-composite in FROM, and there are
some dropped columns in the underlying composite type, ruleutils.c
printed junk in the column alias list for the reconstructed FROM entry.
Before 9.3, this was prevented by doing get_rte_attribute_is_dropped
tests while printing the column alias list; but that solution is not
currently available to us for reasons I'll explain below. Instead,
check for empty-string entries in the alias list, which can only exist
if that column position had been dropped at the time the view was made.
(The parser fills in empty strings to preserve the invariant that the
aliases correspond to physical column positions.)
While this is sufficient to handle the case of columns dropped before
the view was made, we have still got issues with columns dropped after
the view was made. In particular, the view could contain Vars that
explicitly reference such columns! The dependency machinery really
ought to refuse the column drop attempt in such cases, as it would do
when trying to drop a table column that's explicitly referenced in
views. However, we currently neglect to store dependencies on columns
of composite types, and fixing that is likely to be too big to be
back-patchable (not to mention that existing views in existing databases
would not have the needed pg_depend entries anyway). So I'll leave that
for a separate patch.
Pre-9.3, ruleutils would print such Vars normally (with their original
column names) even though it suppressed their entries in the RTE's
column alias list. This is certainly bogus, since the printed view
definition would fail to reload, but at least it didn't crash. However,
as of 9.3 the printed column alias list is tightly tied to the names
printed for Vars; so we can't treat columns as dropped for one purpose
and not dropped for the other. This is why we can't just put back the
get_rte_attribute_is_dropped test: it results in an assertion failure
if the view in fact contains any Vars referencing the dropped column.
Once we've got dependencies preventing such cases, we'll probably want
to do it that way instead of relying on the empty-string test used here.
This fix turned up a very ancient bug in outfuncs/readfuncs, namely
that T_String nodes containing empty strings were not dumped/reloaded
correctly: the node was printed as "<>" which is read as a string
value of <>. Since (per SQL) we disallow empty-string identifiers,
such nodes don't occur normally, which is why we'd not noticed.
(Such nodes aren't used for literal constants, just identifiers.)
Per report from Marc Schablewski. Back-patch to 9.3 which is where
the rule printing behavior changed. The dangling-variable case is
broken all the way back, but that's not what his complaint is about.
2014-07-19 20:28:22 +02:00
|
|
|
* but we don't want it to do anything with an empty string.
|
|
|
|
*/
|
2002-11-25 19:12:12 +01:00
|
|
|
appendStringInfoChar(str, '"');
|
Partial fix for dropped columns in functions returning composite.
When a view has a function-returning-composite in FROM, and there are
some dropped columns in the underlying composite type, ruleutils.c
printed junk in the column alias list for the reconstructed FROM entry.
Before 9.3, this was prevented by doing get_rte_attribute_is_dropped
tests while printing the column alias list; but that solution is not
currently available to us for reasons I'll explain below. Instead,
check for empty-string entries in the alias list, which can only exist
if that column position had been dropped at the time the view was made.
(The parser fills in empty strings to preserve the invariant that the
aliases correspond to physical column positions.)
While this is sufficient to handle the case of columns dropped before
the view was made, we have still got issues with columns dropped after
the view was made. In particular, the view could contain Vars that
explicitly reference such columns! The dependency machinery really
ought to refuse the column drop attempt in such cases, as it would do
when trying to drop a table column that's explicitly referenced in
views. However, we currently neglect to store dependencies on columns
of composite types, and fixing that is likely to be too big to be
back-patchable (not to mention that existing views in existing databases
would not have the needed pg_depend entries anyway). So I'll leave that
for a separate patch.
Pre-9.3, ruleutils would print such Vars normally (with their original
column names) even though it suppressed their entries in the RTE's
column alias list. This is certainly bogus, since the printed view
definition would fail to reload, but at least it didn't crash. However,
as of 9.3 the printed column alias list is tightly tied to the names
printed for Vars; so we can't treat columns as dropped for one purpose
and not dropped for the other. This is why we can't just put back the
get_rte_attribute_is_dropped test: it results in an assertion failure
if the view in fact contains any Vars referencing the dropped column.
Once we've got dependencies preventing such cases, we'll probably want
to do it that way instead of relying on the empty-string test used here.
This fix turned up a very ancient bug in outfuncs/readfuncs, namely
that T_String nodes containing empty strings were not dumped/reloaded
correctly: the node was printed as "<>" which is read as a string
value of <>. Since (per SQL) we disallow empty-string identifiers,
such nodes don't occur normally, which is why we'd not noticed.
(Such nodes aren't used for literal constants, just identifiers.)
Per report from Marc Schablewski. Back-patch to 9.3 which is where
the rule printing behavior changed. The dangling-variable case is
broken all the way back, but that's not what his complaint is about.
2014-07-19 20:28:22 +02:00
|
|
|
if (value->val.str[0] != '\0')
|
2016-09-16 15:36:19 +02:00
|
|
|
outToken(str, value->val.str);
|
2002-11-25 19:12:12 +01:00
|
|
|
appendStringInfoChar(str, '"');
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
2000-10-31 11:22:13 +01:00
|
|
|
case T_BitString:
|
2000-10-31 14:59:53 +01:00
|
|
|
/* internal representation already has leading 'b' */
|
2004-01-31 06:09:41 +01:00
|
|
|
appendStringInfoString(str, value->val.str);
|
2000-10-31 11:22:13 +01:00
|
|
|
break;
|
2007-07-17 03:21:43 +02:00
|
|
|
case T_Null:
|
|
|
|
/* this is seen only within A_Const, not in transformed trees */
|
|
|
|
appendStringInfoString(str, "NULL");
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
default:
|
2003-07-23 01:30:39 +02:00
|
|
|
elog(ERROR, "unrecognized node type: %d", (int) value->type);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1999-02-23 09:01:47 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outColumnRef(StringInfo str, const ColumnRef *node)
|
1999-02-23 09:01:47 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("COLUMNREF");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(fields);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-03-21 17:02:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outParamRef(StringInfo str, const ParamRef *node)
|
2002-03-21 17:02:16 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("PARAMREF");
|
|
|
|
|
|
|
|
WRITE_INT_FIELD(number);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2002-03-21 17:02:16 +01:00
|
|
|
}
|
|
|
|
|
2018-09-16 19:02:47 +02:00
|
|
|
/*
|
|
|
|
* Node types found in raw parse trees (supported for debug purposes)
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outRawStmt(StringInfo str, const RawStmt *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RAWSTMT");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(stmt);
|
|
|
|
WRITE_LOCATION_FIELD(stmt_location);
|
|
|
|
WRITE_INT_FIELD(stmt_len);
|
|
|
|
}
|
|
|
|
|
1997-12-23 20:50:54 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outAConst(StringInfo str, const A_Const *node)
|
1997-12-23 20:50:54 +01:00
|
|
|
{
|
2004-06-09 21:08:20 +02:00
|
|
|
WRITE_NODE_TYPE("A_CONST");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " :val ");
|
1997-12-23 20:50:54 +01:00
|
|
|
_outValue(str, &(node->val));
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
1997-12-23 20:50:54 +01:00
|
|
|
}
|
|
|
|
|
2008-08-30 03:39:14 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outA_Star(StringInfo str, const A_Star *node)
|
2008-08-30 03:39:14 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("A_STAR");
|
|
|
|
}
|
|
|
|
|
2002-03-21 17:02:16 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outA_Indices(StringInfo str, const A_Indices *node)
|
2002-03-21 17:02:16 +01:00
|
|
|
{
|
2004-06-09 21:08:20 +02:00
|
|
|
WRITE_NODE_TYPE("A_INDICES");
|
|
|
|
|
2015-12-23 03:05:16 +01:00
|
|
|
WRITE_BOOL_FIELD(is_slice);
|
2004-06-09 21:08:20 +02:00
|
|
|
WRITE_NODE_FIELD(lidx);
|
|
|
|
WRITE_NODE_FIELD(uidx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outA_Indirection(StringInfo str, const A_Indirection *node)
|
2004-06-09 21:08:20 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("A_INDIRECTION");
|
2002-11-25 19:12:12 +01:00
|
|
|
|
|
|
|
WRITE_NODE_FIELD(arg);
|
|
|
|
WRITE_NODE_FIELD(indirection);
|
2002-03-21 17:02:16 +01:00
|
|
|
}
|
|
|
|
|
2008-03-20 22:42:48 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outA_ArrayExpr(StringInfo str, const A_ArrayExpr *node)
|
2008-03-20 22:42:48 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("A_ARRAYEXPR");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(elements);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2008-03-20 22:42:48 +01:00
|
|
|
}
|
|
|
|
|
2004-06-09 21:08:20 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outResTarget(StringInfo str, const ResTarget *node)
|
2004-06-09 21:08:20 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RESTARGET");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(name);
|
|
|
|
WRITE_NODE_FIELD(indirection);
|
|
|
|
WRITE_NODE_FIELD(val);
|
2008-08-29 01:09:48 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2004-06-09 21:08:20 +02:00
|
|
|
}
|
|
|
|
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
static void
|
|
|
|
_outMultiAssignRef(StringInfo str, const MultiAssignRef *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("MULTIASSIGNREF");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(source);
|
|
|
|
WRITE_INT_FIELD(colno);
|
|
|
|
WRITE_INT_FIELD(ncolumns);
|
|
|
|
}
|
|
|
|
|
2008-07-17 18:02:12 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outSortBy(StringInfo str, const SortBy *node)
|
2008-07-17 18:02:12 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("SORTBY");
|
|
|
|
|
2008-09-01 22:42:46 +02:00
|
|
|
WRITE_NODE_FIELD(node);
|
2008-07-17 18:02:12 +02:00
|
|
|
WRITE_ENUM_FIELD(sortby_dir, SortByDir);
|
|
|
|
WRITE_ENUM_FIELD(sortby_nulls, SortByNulls);
|
|
|
|
WRITE_NODE_FIELD(useOp);
|
2008-09-01 22:42:46 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
2008-07-17 18:02:12 +02:00
|
|
|
}
|
|
|
|
|
2008-12-28 19:54:01 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outWindowDef(StringInfo str, const WindowDef *node)
|
2008-12-28 19:54:01 +01:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("WINDOWDEF");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(name);
|
|
|
|
WRITE_STRING_FIELD(refname);
|
|
|
|
WRITE_NODE_FIELD(partitionClause);
|
|
|
|
WRITE_NODE_FIELD(orderClause);
|
2008-12-31 01:08:39 +01:00
|
|
|
WRITE_INT_FIELD(frameOptions);
|
2010-02-12 18:33:21 +01:00
|
|
|
WRITE_NODE_FIELD(startOffset);
|
|
|
|
WRITE_NODE_FIELD(endOffset);
|
2008-12-28 19:54:01 +01:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2008-10-04 23:56:55 +02:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRangeSubselect(StringInfo str, const RangeSubselect *node)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RANGESUBSELECT");
|
|
|
|
|
2012-08-08 01:02:54 +02:00
|
|
|
WRITE_BOOL_FIELD(lateral);
|
2008-10-04 23:56:55 +02:00
|
|
|
WRITE_NODE_FIELD(subquery);
|
|
|
|
WRITE_NODE_FIELD(alias);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outRangeFunction(StringInfo str, const RangeFunction *node)
|
2008-10-04 23:56:55 +02:00
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RANGEFUNCTION");
|
|
|
|
|
2012-08-08 01:02:54 +02:00
|
|
|
WRITE_BOOL_FIELD(lateral);
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
WRITE_BOOL_FIELD(ordinality);
|
2013-12-10 15:34:37 +01:00
|
|
|
WRITE_BOOL_FIELD(is_rowsfrom);
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
WRITE_NODE_FIELD(functions);
|
2008-10-04 23:56:55 +02:00
|
|
|
WRITE_NODE_FIELD(alias);
|
|
|
|
WRITE_NODE_FIELD(coldeflist);
|
|
|
|
}
|
|
|
|
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
static void
|
|
|
|
_outRangeTableSample(StringInfo str, const RangeTableSample *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RANGETABLESAMPLE");
|
|
|
|
|
|
|
|
WRITE_NODE_FIELD(relation);
|
|
|
|
WRITE_NODE_FIELD(method);
|
|
|
|
WRITE_NODE_FIELD(args);
|
|
|
|
WRITE_NODE_FIELD(repeatable);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
2017-03-08 16:39:37 +01:00
|
|
|
static void
|
|
|
|
_outRangeTableFunc(StringInfo str, const RangeTableFunc *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RANGETABLEFUNC");
|
|
|
|
|
|
|
|
WRITE_BOOL_FIELD(lateral);
|
|
|
|
WRITE_NODE_FIELD(docexpr);
|
|
|
|
WRITE_NODE_FIELD(rowexpr);
|
|
|
|
WRITE_NODE_FIELD(namespaces);
|
|
|
|
WRITE_NODE_FIELD(columns);
|
|
|
|
WRITE_NODE_FIELD(alias);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outRangeTableFuncCol(StringInfo str, const RangeTableFuncCol *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("RANGETABLEFUNCCOL");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(colname);
|
|
|
|
WRITE_NODE_FIELD(typeName);
|
|
|
|
WRITE_BOOL_FIELD(for_ordinality);
|
|
|
|
WRITE_BOOL_FIELD(is_not_null);
|
|
|
|
WRITE_NODE_FIELD(colexpr);
|
|
|
|
WRITE_NODE_FIELD(coldefexpr);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
1998-12-04 16:34:49 +01:00
|
|
|
static void
|
2011-12-07 20:46:56 +01:00
|
|
|
_outConstraint(StringInfo str, const Constraint *node)
|
1998-12-04 16:34:49 +01:00
|
|
|
{
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_TYPE("CONSTRAINT");
|
|
|
|
|
2009-07-30 04:45:38 +02:00
|
|
|
WRITE_STRING_FIELD(conname);
|
|
|
|
WRITE_BOOL_FIELD(deferrable);
|
|
|
|
WRITE_BOOL_FIELD(initdeferred);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
1998-12-04 16:34:49 +01:00
|
|
|
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, " :contype ");
|
1998-12-04 16:34:49 +01:00
|
|
|
switch (node->contype)
|
|
|
|
{
|
2009-07-30 04:45:38 +02:00
|
|
|
case CONSTR_NULL:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "NULL");
|
2009-07-30 04:45:38 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CONSTR_NOTNULL:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "NOT_NULL");
|
2009-07-30 04:45:38 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CONSTR_DEFAULT:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "DEFAULT");
|
2009-07-30 04:45:38 +02:00
|
|
|
WRITE_NODE_FIELD(raw_expr);
|
|
|
|
WRITE_STRING_FIELD(cooked_expr);
|
|
|
|
break;
|
|
|
|
|
2017-04-06 14:33:16 +02:00
|
|
|
case CONSTR_IDENTITY:
|
|
|
|
appendStringInfoString(str, "IDENTITY");
|
|
|
|
WRITE_NODE_FIELD(raw_expr);
|
|
|
|
WRITE_STRING_FIELD(cooked_expr);
|
|
|
|
WRITE_CHAR_FIELD(generated_when);
|
|
|
|
break;
|
|
|
|
|
2019-03-30 08:13:09 +01:00
|
|
|
case CONSTR_GENERATED:
|
|
|
|
appendStringInfoString(str, "GENERATED");
|
|
|
|
WRITE_NODE_FIELD(raw_expr);
|
|
|
|
WRITE_STRING_FIELD(cooked_expr);
|
|
|
|
WRITE_CHAR_FIELD(generated_when);
|
|
|
|
break;
|
|
|
|
|
2009-07-30 04:45:38 +02:00
|
|
|
case CONSTR_CHECK:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "CHECK");
|
2012-04-21 04:46:20 +02:00
|
|
|
WRITE_BOOL_FIELD(is_no_inherit);
|
2009-07-30 04:45:38 +02:00
|
|
|
WRITE_NODE_FIELD(raw_expr);
|
|
|
|
WRITE_STRING_FIELD(cooked_expr);
|
|
|
|
break;
|
|
|
|
|
1998-12-04 16:34:49 +01:00
|
|
|
case CONSTR_PRIMARY:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "PRIMARY_KEY");
|
2002-11-25 19:12:12 +01:00
|
|
|
WRITE_NODE_FIELD(keys);
|
2018-04-07 22:00:39 +02:00
|
|
|
WRITE_NODE_FIELD(including);
|
2006-07-02 04:23:23 +02:00
|
|
|
WRITE_NODE_FIELD(options);
|
2011-01-25 21:42:03 +01:00
|
|
|
WRITE_STRING_FIELD(indexname);
|
2004-08-02 06:28:29 +02:00
|
|
|
WRITE_STRING_FIELD(indexspace);
|
Fix tablespace inheritance for partitioned rels
Commit ca4103025dfe left a few loose ends. The most important one
(broken pg_dump output) is already fixed by virtue of commit
3b23552ad8bb, but some things remained:
* When ALTER TABLE rewrites tables, the indexes must remain in the
tablespace they were originally in. This didn't work because
index recreation during ALTER TABLE runs manufactured SQL (yuck),
which runs afoul of default_tablespace in competition with the parent
relation tablespace. To fix, reset default_tablespace to the empty
string temporarily, and add the TABLESPACE clause as appropriate.
* Setting a partitioned rel's tablespace to the database default is
confusing; if it worked, it would direct the partitions to that
tablespace regardless of default_tablespace. But in reality it does
not work, and making it work is a larger project. Therefore, throw
an error when this condition is detected, to alert the unwary.
Add some docs and tests, too.
Author: Álvaro Herrera
Discussion: https://postgr.es/m/CAKJS1f_1c260nOt_vBJ067AZ3JXptXVRohDVMLEBmudX1YEx-A@mail.gmail.com
2019-04-25 16:20:23 +02:00
|
|
|
WRITE_BOOL_FIELD(reset_default_tblspc);
|
2009-12-07 06:22:23 +01:00
|
|
|
/* access_method and where_clause not currently used */
|
2004-08-02 06:28:29 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CONSTR_UNIQUE:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "UNIQUE");
|
2004-08-02 06:28:29 +02:00
|
|
|
WRITE_NODE_FIELD(keys);
|
2018-04-07 22:00:39 +02:00
|
|
|
WRITE_NODE_FIELD(including);
|
2006-07-02 04:23:23 +02:00
|
|
|
WRITE_NODE_FIELD(options);
|
2011-01-25 21:42:03 +01:00
|
|
|
WRITE_STRING_FIELD(indexname);
|
2004-08-02 06:28:29 +02:00
|
|
|
WRITE_STRING_FIELD(indexspace);
|
Fix tablespace inheritance for partitioned rels
Commit ca4103025dfe left a few loose ends. The most important one
(broken pg_dump output) is already fixed by virtue of commit
3b23552ad8bb, but some things remained:
* When ALTER TABLE rewrites tables, the indexes must remain in the
tablespace they were originally in. This didn't work because
index recreation during ALTER TABLE runs manufactured SQL (yuck),
which runs afoul of default_tablespace in competition with the parent
relation tablespace. To fix, reset default_tablespace to the empty
string temporarily, and add the TABLESPACE clause as appropriate.
* Setting a partitioned rel's tablespace to the database default is
confusing; if it worked, it would direct the partitions to that
tablespace regardless of default_tablespace. But in reality it does
not work, and making it work is a larger project. Therefore, throw
an error when this condition is detected, to alert the unwary.
Add some docs and tests, too.
Author: Álvaro Herrera
Discussion: https://postgr.es/m/CAKJS1f_1c260nOt_vBJ067AZ3JXptXVRohDVMLEBmudX1YEx-A@mail.gmail.com
2019-04-25 16:20:23 +02:00
|
|
|
WRITE_BOOL_FIELD(reset_default_tblspc);
|
2009-12-07 06:22:23 +01:00
|
|
|
/* access_method and where_clause not currently used */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CONSTR_EXCLUSION:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "EXCLUSION");
|
2009-12-07 06:22:23 +01:00
|
|
|
WRITE_NODE_FIELD(exclusions);
|
2018-04-07 22:00:39 +02:00
|
|
|
WRITE_NODE_FIELD(including);
|
2009-12-07 06:22:23 +01:00
|
|
|
WRITE_NODE_FIELD(options);
|
2011-01-25 21:42:03 +01:00
|
|
|
WRITE_STRING_FIELD(indexname);
|
2009-12-07 06:22:23 +01:00
|
|
|
WRITE_STRING_FIELD(indexspace);
|
Fix tablespace inheritance for partitioned rels
Commit ca4103025dfe left a few loose ends. The most important one
(broken pg_dump output) is already fixed by virtue of commit
3b23552ad8bb, but some things remained:
* When ALTER TABLE rewrites tables, the indexes must remain in the
tablespace they were originally in. This didn't work because
index recreation during ALTER TABLE runs manufactured SQL (yuck),
which runs afoul of default_tablespace in competition with the parent
relation tablespace. To fix, reset default_tablespace to the empty
string temporarily, and add the TABLESPACE clause as appropriate.
* Setting a partitioned rel's tablespace to the database default is
confusing; if it worked, it would direct the partitions to that
tablespace regardless of default_tablespace. But in reality it does
not work, and making it work is a larger project. Therefore, throw
an error when this condition is detected, to alert the unwary.
Add some docs and tests, too.
Author: Álvaro Herrera
Discussion: https://postgr.es/m/CAKJS1f_1c260nOt_vBJ067AZ3JXptXVRohDVMLEBmudX1YEx-A@mail.gmail.com
2019-04-25 16:20:23 +02:00
|
|
|
WRITE_BOOL_FIELD(reset_default_tblspc);
|
2009-12-07 06:22:23 +01:00
|
|
|
WRITE_STRING_FIELD(access_method);
|
|
|
|
WRITE_NODE_FIELD(where_clause);
|
1998-12-04 16:34:49 +01:00
|
|
|
break;
|
|
|
|
|
2009-07-30 04:45:38 +02:00
|
|
|
case CONSTR_FOREIGN:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "FOREIGN_KEY");
|
2009-07-30 04:45:38 +02:00
|
|
|
WRITE_NODE_FIELD(pktable);
|
|
|
|
WRITE_NODE_FIELD(fk_attrs);
|
|
|
|
WRITE_NODE_FIELD(pk_attrs);
|
|
|
|
WRITE_CHAR_FIELD(fk_matchtype);
|
|
|
|
WRITE_CHAR_FIELD(fk_upd_action);
|
|
|
|
WRITE_CHAR_FIELD(fk_del_action);
|
ALTER TABLE: skip FK validation when it's safe to do so
We already skip rewriting the table in these cases, but we still force a
whole table scan to validate the data. This can be skipped, and thus
we can make the whole ALTER TABLE operation just do some catalog touches
instead of scanning the table, when these two conditions hold:
(a) Old and new pg_constraint.conpfeqop match exactly. This is actually
stronger than needed; we could loosen things by way of operator
families, but it'd require a lot more effort.
(b) The functions, if any, implementing a cast from the foreign type to
the primary opcintype are the same. For this purpose, we can consider a
binary coercion equivalent to an exact type match. When the opcintype
is polymorphic, require that the old and new foreign types match
exactly. (Since ri_triggers.c does use the executor, the stronger check
for polymorphic types is no mere future-proofing. However, no core type
exercises its necessity.)
Author: Noah Misch
Committer's note: catalog version bumped due to change of the Constraint
node. I can't actually find any way to have such a node in a stored
rule, but given that we have "out" support for them, better be safe.
2012-02-27 22:28:00 +01:00
|
|
|
WRITE_NODE_FIELD(old_conpfeqop);
|
Avoid repeated name lookups during table and index DDL.
If the name lookups come to different conclusions due to concurrent
activity, we might perform some parts of the DDL on a different table
than other parts. At least in the case of CREATE INDEX, this can be
used to cause the permissions checks to be performed against a
different table than the index creation, allowing for a privilege
escalation attack.
This changes the calling convention for DefineIndex, CreateTrigger,
transformIndexStmt, transformAlterTableStmt, CheckIndexCompatible
(in 9.2 and newer), and AlterTable (in 9.1 and older). In addition,
CheckRelationOwnership is removed in 9.2 and newer and the calling
convention is changed in older branches. A field has also been added
to the Constraint node (FkConstraint in 8.4). Third-party code calling
these functions or using the Constraint node will require updating.
Report by Andres Freund. Patch by Robert Haas and Andres Freund,
reviewed by Tom Lane.
Security: CVE-2014-0062
2014-02-17 15:33:31 +01:00
|
|
|
WRITE_OID_FIELD(old_pktable_oid);
|
2009-07-30 04:45:38 +02:00
|
|
|
WRITE_BOOL_FIELD(skip_validation);
|
2011-03-23 00:10:35 +01:00
|
|
|
WRITE_BOOL_FIELD(initially_valid);
|
1998-12-04 16:34:49 +01:00
|
|
|
break;
|
|
|
|
|
2009-07-30 04:45:38 +02:00
|
|
|
case CONSTR_ATTR_DEFERRABLE:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "ATTR_DEFERRABLE");
|
1998-12-04 16:34:49 +01:00
|
|
|
break;
|
|
|
|
|
2009-07-30 04:45:38 +02:00
|
|
|
case CONSTR_ATTR_NOT_DEFERRABLE:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "ATTR_NOT_DEFERRABLE");
|
1998-12-04 16:34:49 +01:00
|
|
|
break;
|
|
|
|
|
2009-07-30 04:45:38 +02:00
|
|
|
case CONSTR_ATTR_DEFERRED:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "ATTR_DEFERRED");
|
1998-12-04 16:34:49 +01:00
|
|
|
break;
|
|
|
|
|
2009-07-30 04:45:38 +02:00
|
|
|
case CONSTR_ATTR_IMMEDIATE:
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "ATTR_IMMEDIATE");
|
2009-07-30 04:45:38 +02:00
|
|
|
break;
|
2002-11-25 19:12:12 +01:00
|
|
|
|
2009-07-30 04:45:38 +02:00
|
|
|
default:
|
|
|
|
appendStringInfo(str, "<unrecognized_constraint %d>",
|
|
|
|
(int) node->contype);
|
|
|
|
break;
|
|
|
|
}
|
2001-10-25 16:08:11 +02:00
|
|
|
}
|
|
|
|
|
2016-06-18 21:22:34 +02:00
|
|
|
static void
|
|
|
|
_outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("FOREIGNKEYCACHEINFO");
|
|
|
|
|
Correct attach/detach logic for FKs in partitions
There was no code to handle foreign key constraints on partitioned
tables in the case of ALTER TABLE DETACH; and if you happened to ATTACH
a partition that already had an equivalent constraint, that one was
ignored and a new constraint was created. Adding this to the fact that
foreign key cloning reuses the constraint name on the partition instead
of generating a new name (as it probably should, to cater to SQL
standard rules about constraint naming within schemas), the result was a
pretty poor user experience -- the most visible failure was that just
detaching a partition and re-attaching it failed with an error such as
ERROR: duplicate key value violates unique constraint "pg_constraint_conrelid_contypid_conname_index"
DETAIL: Key (conrelid, contypid, conname)=(26702, 0, test_result_asset_id_fkey) already exists.
because it would try to create an identically-named constraint in the
partition. To make matters worse, if you tried to drop the constraint
in the now-independent partition, that would fail because the constraint
was still seen as dependent on the constraint in its former parent
partitioned table:
ERROR: cannot drop inherited constraint "test_result_asset_id_fkey" of relation "test_result_cbsystem_0001_0050_monthly_2018_09"
This fix attacks the problem from two angles: first, when the partition
is detached, the constraint is also marked as independent, so the drop
now works. Second, when the partition is re-attached, we scan existing
constraints searching for one matching the FK in the parent, and if one
exists, we link that one to the parent constraint. So we don't end up
with a duplicate -- and better yet, we don't need to scan the referenced
table to verify that the constraint holds.
To implement this I made a small change to previously planner-only
struct ForeignKeyCacheInfo to contain the constraint OID; also relcache
now maintains the list of FKs for partitioned tables too.
Backpatch to 11.
Reported-by: Michael Vitale (bug #15425)
Discussion: https://postgr.es/m/15425-2dbc9d2aa999f816@postgresql.org
2018-10-12 17:36:26 +02:00
|
|
|
WRITE_OID_FIELD(conoid);
|
2016-06-18 21:22:34 +02:00
|
|
|
WRITE_OID_FIELD(conrelid);
|
|
|
|
WRITE_OID_FIELD(confrelid);
|
|
|
|
WRITE_INT_FIELD(nkeys);
|
2018-12-22 06:53:37 +01:00
|
|
|
WRITE_ATTRNUMBER_ARRAY(conkey, node->nkeys);
|
|
|
|
WRITE_ATTRNUMBER_ARRAY(confkey, node->nkeys);
|
|
|
|
WRITE_OID_ARRAY(conpfeqop, node->nkeys);
|
2016-06-18 21:22:34 +02:00
|
|
|
}
|
|
|
|
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
static void
|
|
|
|
_outPartitionElem(StringInfo str, const PartitionElem *node)
|
|
|
|
{
|
|
|
|
WRITE_NODE_TYPE("PARTITIONELEM");
|
|
|
|
|
|
|
|
WRITE_STRING_FIELD(name);
|
|
|
|
WRITE_NODE_FIELD(expr);
|
|
|
|
WRITE_NODE_FIELD(collation);
|
|
|
|
WRITE_NODE_FIELD(opclass);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
Code review focused on new node types added by partitioning support.
Fix failure to check that we got a plain Const from const-simplification of
a coercion request. This is the cause of bug #14666 from Tian Bing: there
is an int4 to money cast, but it's only stable not immutable (because of
dependence on lc_monetary), resulting in a FuncExpr that the code was
miserably unequipped to deal with, or indeed even to notice that it was
failing to deal with. Add test cases around this coercion behavior.
In view of the above, sprinkle the code liberally with castNode() macros,
in hope of catching the next such bug a bit sooner. Also, change some
functions that were randomly declared to take Node* to take more specific
pointer types. And change some struct fields that were declared Node*
but could be given more specific types, allowing removal of assorted
explicit casts.
Place PARTITION_MAX_KEYS check a bit closer to the code it's protecting.
Likewise check only-one-key-for-list-partitioning restriction in a less
random place.
Avoid not-per-project-style usages like !strcmp(...).
Fix assorted failures to avoid scribbling on the input of parse
transformation. I'm not sure how necessary this is, but it's entirely
silly for these functions to be expending cycles to avoid that and not
getting it right.
Add guards against partitioning on system columns.
Put backend/nodes/ support code into an order that matches handling
of these node types elsewhere.
Annotate the fact that somebody added location fields to PartitionBoundSpec
and PartitionRangeDatum but forgot to handle them in
outfuncs.c/readfuncs.c. This is fairly harmless for production purposes
(since readfuncs.c would just substitute -1 anyway) but it's still bogus.
It's not worth forcing a post-beta1 initdb just to fix this, but if we
have another reason to force initdb before 10.0, we should go back and
clean this up.
Contrariwise, somebody added location fields to PartitionElem and
PartitionSpec but forgot to teach exprLocation() about them.
Consolidate duplicative code in transformPartitionBound().
Improve a couple of error messages.
Improve assorted commentary.
Re-pgindent the files touched by this patch; this affects a few comment
blocks that must have been added quite recently.
Report: https://postgr.es/m/20170524024550.29935.14396@wrigleys.postgresql.org
2017-05-29 05:20:28 +02:00
|
|
|
static void
|
|
|
|
_outPartitionSpec(StringInfo str, const PartitionSpec *node)
|
|
|
|
{
|
2017-05-30 17:32:41 +02:00
|
|
|
WRITE_NODE_TYPE("PARTITIONSPEC");
|
Code review focused on new node types added by partitioning support.
Fix failure to check that we got a plain Const from const-simplification of
a coercion request. This is the cause of bug #14666 from Tian Bing: there
is an int4 to money cast, but it's only stable not immutable (because of
dependence on lc_monetary), resulting in a FuncExpr that the code was
miserably unequipped to deal with, or indeed even to notice that it was
failing to deal with. Add test cases around this coercion behavior.
In view of the above, sprinkle the code liberally with castNode() macros,
in hope of catching the next such bug a bit sooner. Also, change some
functions that were randomly declared to take Node* to take more specific
pointer types. And change some struct fields that were declared Node*
but could be given more specific types, allowing removal of assorted
explicit casts.
Place PARTITION_MAX_KEYS check a bit closer to the code it's protecting.
Likewise check only-one-key-for-list-partitioning restriction in a less
random place.
Avoid not-per-project-style usages like !strcmp(...).
Fix assorted failures to avoid scribbling on the input of parse
transformation. I'm not sure how necessary this is, but it's entirely
silly for these functions to be expending cycles to avoid that and not
getting it right.
Add guards against partitioning on system columns.
Put backend/nodes/ support code into an order that matches handling
of these node types elsewhere.
Annotate the fact that somebody added location fields to PartitionBoundSpec
and PartitionRangeDatum but forgot to handle them in
outfuncs.c/readfuncs.c. This is fairly harmless for production purposes
(since readfuncs.c would just substitute -1 anyway) but it's still bogus.
It's not worth forcing a post-beta1 initdb just to fix this, but if we
have another reason to force initdb before 10.0, we should go back and
clean this up.
Contrariwise, somebody added location fields to PartitionElem and
PartitionSpec but forgot to teach exprLocation() about them.
Consolidate duplicative code in transformPartitionBound().
Improve a couple of error messages.
Improve assorted commentary.
Re-pgindent the files touched by this patch; this affects a few comment
blocks that must have been added quite recently.
Report: https://postgr.es/m/20170524024550.29935.14396@wrigleys.postgresql.org
2017-05-29 05:20:28 +02:00
|
|
|
|
|
|
|
WRITE_STRING_FIELD(strategy);
|
|
|
|
WRITE_NODE_FIELD(partParams);
|
|
|
|
WRITE_LOCATION_FIELD(location);
|
|
|
|
}
|
|
|
|
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
static void
|
|
|
|
_outPartitionBoundSpec(StringInfo str, const PartitionBoundSpec *node)
|
|
|
|
{
|
2017-05-30 17:32:41 +02:00
|
|
|
WRITE_NODE_TYPE("PARTITIONBOUNDSPEC");
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
|
|
|
|
WRITE_CHAR_FIELD(strategy);
|
Allow a partitioned table to have a default partition.
Any tuples that don't route to any other partition will route to the
default partition.
Jeevan Ladhe, Beena Emerson, Ashutosh Bapat, Rahila Syed, and Robert
Haas, with review and testing at various stages by (at least) Rushabh
Lathia, Keith Fiske, Amit Langote, Amul Sul, Rajkumar Raghuanshi, Sven
Kunze, Kyotaro Horiguchi, Thom Brown, Rafia Sabih, and Dilip Kumar.
Discussion: http://postgr.es/m/CAH2L28tbN4SYyhS7YV1YBWcitkqbhSWfQCy0G=apRcC_PEO-bg@mail.gmail.com
Discussion: http://postgr.es/m/CAOG9ApEYj34fWMcvBMBQ-YtqR9fTdXhdN82QEKG0SVZ6zeL1xg@mail.gmail.com
2017-09-08 23:28:04 +02:00
|
|
|
WRITE_BOOL_FIELD(is_default);
|
Add hash partitioning.
Hash partitioning is useful when you want to partition a growing data
set evenly. This can be useful to keep table sizes reasonable, which
makes maintenance operations such as VACUUM faster, or to enable
partition-wise join.
At present, we still depend on constraint exclusion for partitioning
pruning, and the shape of the partition constraints for hash
partitioning is such that that doesn't work. Work is underway to fix
that, which should both improve performance and make partitioning
pruning work with hash partitioning.
Amul Sul, reviewed and tested by Dilip Kumar, Ashutosh Bapat, Yugo
Nagata, Rajkumar Raghuwanshi, Jesper Pedersen, and by me. A few
final tweaks also by me.
Discussion: http://postgr.es/m/CAAJ_b96fhpJAP=ALbETmeLk1Uni_GFZD938zgenhF49qgDTjaQ@mail.gmail.com
2017-11-10 00:07:25 +01:00
|
|
|
WRITE_INT_FIELD(modulus);
|
|
|
|
WRITE_INT_FIELD(remainder);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
WRITE_NODE_FIELD(listdatums);
|
|
|
|
WRITE_NODE_FIELD(lowerdatums);
|
|
|
|
WRITE_NODE_FIELD(upperdatums);
|
2017-05-30 17:32:41 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_outPartitionRangeDatum(StringInfo str, const PartitionRangeDatum *node)
|
|
|
|
{
|
2017-05-30 17:32:41 +02:00
|
|
|
WRITE_NODE_TYPE("PARTITIONRANGEDATUM");
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
|
Use MINVALUE/MAXVALUE instead of UNBOUNDED for range partition bounds.
Previously, UNBOUNDED meant no lower bound when used in the FROM list,
and no upper bound when used in the TO list, which was OK for
single-column range partitioning, but problematic with multiple
columns. For example, an upper bound of (10.0, UNBOUNDED) would not be
collocated with a lower bound of (10.0, UNBOUNDED), thus making it
difficult or impossible to define contiguous multi-column range
partitions in some cases.
Fix this by using MINVALUE and MAXVALUE instead of UNBOUNDED to
represent a partition column that is unbounded below or above
respectively. This syntax removes any ambiguity, and ensures that if
one partition's lower bound equals another partition's upper bound,
then the partitions are contiguous.
Also drop the constraint prohibiting finite values after an unbounded
column, and just document the fact that any values after MINVALUE or
MAXVALUE are ignored. Previously it was necessary to repeat UNBOUNDED
multiple times, which was needlessly verbose.
Note: Forces a post-PG 10 beta2 initdb.
Report by Amul Sul, original patch by Amit Langote with some
additional hacking by me.
Discussion: https://postgr.es/m/CAAJ_b947mowpLdxL3jo3YLKngRjrq9+Ej4ymduQTfYR+8=YAYQ@mail.gmail.com
2017-07-21 10:20:47 +02:00
|
|
|
WRITE_ENUM_FIELD(kind, PartitionRangeDatumKind);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
WRITE_NODE_FIELD(value);
|
2017-05-30 17:32:41 +02:00
|
|
|
WRITE_LOCATION_FIELD(location);
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
}
|
2002-11-25 19:12:12 +01:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2016-04-08 23:26:36 +02:00
|
|
|
* outNode -
|
1997-09-07 07:04:48 +02:00
|
|
|
* converts a Node into ascii string and append it to 'str'
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2016-04-08 23:26:36 +02:00
|
|
|
void
|
|
|
|
outNode(StringInfo str, const void *obj)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2018-12-10 17:12:43 +01:00
|
|
|
/* Guard against stack overflow due to overly complex expressions */
|
|
|
|
check_stack_depth();
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
if (obj == NULL)
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(str, "<>");
|
2004-08-29 07:07:03 +02:00
|
|
|
else if (IsA(obj, List) ||IsA(obj, IntList) || IsA(obj, OidList))
|
2004-05-26 06:41:50 +02:00
|
|
|
_outList(str, obj);
|
2002-11-25 19:12:12 +01:00
|
|
|
else if (IsA(obj, Integer) ||
|
|
|
|
IsA(obj, Float) ||
|
|
|
|
IsA(obj, String) ||
|
|
|
|
IsA(obj, BitString))
|
2000-02-15 21:49:31 +01:00
|
|
|
{
|
|
|
|
/* nodeRead does not want to see { } around these! */
|
|
|
|
_outValue(str, obj);
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
|
|
|
{
|
2000-01-14 01:53:21 +01:00
|
|
|
appendStringInfoChar(str, '{');
|
1997-09-07 07:04:48 +02:00
|
|
|
switch (nodeTag(obj))
|
|
|
|
{
|
2007-02-20 18:32:18 +01:00
|
|
|
case T_PlannedStmt:
|
|
|
|
_outPlannedStmt(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_Plan:
|
|
|
|
_outPlan(str, obj);
|
|
|
|
break;
|
|
|
|
case T_Result:
|
|
|
|
_outResult(str, obj);
|
|
|
|
break;
|
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
|
|
|
case T_ProjectSet:
|
|
|
|
_outProjectSet(str, obj);
|
|
|
|
break;
|
2009-10-10 03:43:50 +02:00
|
|
|
case T_ModifyTable:
|
|
|
|
_outModifyTable(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_Append:
|
|
|
|
_outAppend(str, obj);
|
|
|
|
break;
|
2010-10-14 22:56:39 +02:00
|
|
|
case T_MergeAppend:
|
|
|
|
_outMergeAppend(str, obj);
|
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case T_RecursiveUnion:
|
|
|
|
_outRecursiveUnion(str, obj);
|
|
|
|
break;
|
2005-04-20 00:35:18 +02:00
|
|
|
case T_BitmapAnd:
|
|
|
|
_outBitmapAnd(str, obj);
|
|
|
|
break;
|
|
|
|
case T_BitmapOr:
|
|
|
|
_outBitmapOr(str, obj);
|
|
|
|
break;
|
Add a Gather executor node.
A Gather executor node runs any number of copies of a plan in an equal
number of workers and merges all of the results into a single tuple
stream. It can also run the plan itself, if the workers are
unavailable or haven't started up yet. It is intended to work with
the Partial Seq Scan node which will be added in future commits.
It could also be used to implement parallel query of a different sort
by itself, without help from Partial Seq Scan, if the single_copy mode
is used. In that mode, a worker executes the plan, and the parallel
leader does not, merely collecting the worker's results. So, a Gather
node could be inserted into a plan to split the execution of that plan
across two processes. Nested Gather nodes aren't currently supported,
but we might want to add support for that in the future.
There's nothing in the planner to actually generate Gather nodes yet,
so it's not quite time to break out the champagne. But we're getting
close.
Amit Kapila. Some designs suggestions were provided by me, and I also
reviewed the patch. Single-copy mode, documentation, and other minor
changes also by me.
2015-10-01 01:23:36 +02:00
|
|
|
case T_Gather:
|
|
|
|
_outGather(str, obj);
|
|
|
|
break;
|
2017-03-09 13:40:36 +01:00
|
|
|
case T_GatherMerge:
|
|
|
|
_outGatherMerge(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_Scan:
|
|
|
|
_outScan(str, obj);
|
|
|
|
break;
|
|
|
|
case T_SeqScan:
|
|
|
|
_outSeqScan(str, obj);
|
|
|
|
break;
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
case T_SampleScan:
|
|
|
|
_outSampleScan(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_IndexScan:
|
|
|
|
_outIndexScan(str, obj);
|
|
|
|
break;
|
2011-10-11 20:20:06 +02:00
|
|
|
case T_IndexOnlyScan:
|
|
|
|
_outIndexOnlyScan(str, obj);
|
|
|
|
break;
|
2005-04-20 00:35:18 +02:00
|
|
|
case T_BitmapIndexScan:
|
|
|
|
_outBitmapIndexScan(str, obj);
|
|
|
|
break;
|
|
|
|
case T_BitmapHeapScan:
|
|
|
|
_outBitmapHeapScan(str, obj);
|
|
|
|
break;
|
1999-11-23 21:07:06 +01:00
|
|
|
case T_TidScan:
|
|
|
|
_outTidScan(str, obj);
|
|
|
|
break;
|
2000-09-29 20:21:41 +02:00
|
|
|
case T_SubqueryScan:
|
|
|
|
_outSubqueryScan(str, obj);
|
|
|
|
break;
|
2002-05-12 22:10:05 +02:00
|
|
|
case T_FunctionScan:
|
|
|
|
_outFunctionScan(str, obj);
|
|
|
|
break;
|
2017-03-08 16:39:37 +01:00
|
|
|
case T_TableFuncScan:
|
|
|
|
_outTableFuncScan(str, obj);
|
|
|
|
break;
|
2006-08-02 03:59:48 +02:00
|
|
|
case T_ValuesScan:
|
|
|
|
_outValuesScan(str, obj);
|
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case T_CteScan:
|
|
|
|
_outCteScan(str, obj);
|
|
|
|
break;
|
2017-04-01 06:17:18 +02:00
|
|
|
case T_NamedTuplestoreScan:
|
|
|
|
_outNamedTuplestoreScan(str, obj);
|
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case T_WorkTableScan:
|
|
|
|
_outWorkTableScan(str, obj);
|
|
|
|
break;
|
2011-02-20 06:17:18 +01:00
|
|
|
case T_ForeignScan:
|
|
|
|
_outForeignScan(str, obj);
|
|
|
|
break;
|
2014-11-07 23:26:02 +01:00
|
|
|
case T_CustomScan:
|
|
|
|
_outCustomScan(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_Join:
|
|
|
|
_outJoin(str, obj);
|
2000-03-24 03:58:25 +01:00
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_NestLoop:
|
|
|
|
_outNestLoop(str, obj);
|
|
|
|
break;
|
|
|
|
case T_MergeJoin:
|
|
|
|
_outMergeJoin(str, obj);
|
|
|
|
break;
|
|
|
|
case T_HashJoin:
|
|
|
|
_outHashJoin(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
|
|
|
case T_Agg:
|
|
|
|
_outAgg(str, obj);
|
|
|
|
break;
|
2008-12-28 19:54:01 +01:00
|
|
|
case T_WindowAgg:
|
|
|
|
_outWindowAgg(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_Group:
|
|
|
|
_outGroup(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_Material:
|
|
|
|
_outMaterial(str, obj);
|
|
|
|
break;
|
|
|
|
case T_Sort:
|
|
|
|
_outSort(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_Unique:
|
|
|
|
_outUnique(str, obj);
|
|
|
|
break;
|
2008-09-09 20:58:09 +02:00
|
|
|
case T_Hash:
|
|
|
|
_outHash(str, obj);
|
|
|
|
break;
|
2000-10-05 21:11:39 +02:00
|
|
|
case T_SetOp:
|
|
|
|
_outSetOp(str, obj);
|
|
|
|
break;
|
2009-10-12 20:10:51 +02:00
|
|
|
case T_LockRows:
|
|
|
|
_outLockRows(str, obj);
|
|
|
|
break;
|
2000-10-26 23:38:24 +02:00
|
|
|
case T_Limit:
|
|
|
|
_outLimit(str, obj);
|
|
|
|
break;
|
2010-07-12 19:01:06 +02:00
|
|
|
case T_NestLoopParam:
|
|
|
|
_outNestLoopParam(str, obj);
|
|
|
|
break;
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
case T_PlanRowMark:
|
|
|
|
_outPlanRowMark(str, obj);
|
|
|
|
break;
|
2018-06-10 22:30:14 +02:00
|
|
|
case T_PartitionPruneInfo:
|
|
|
|
_outPartitionPruneInfo(str, obj);
|
|
|
|
break;
|
2018-08-02 01:42:46 +02:00
|
|
|
case T_PartitionedRelPruneInfo:
|
|
|
|
_outPartitionedRelPruneInfo(str, obj);
|
|
|
|
break;
|
2018-06-10 22:30:14 +02:00
|
|
|
case T_PartitionPruneStepOp:
|
|
|
|
_outPartitionPruneStepOp(str, obj);
|
|
|
|
break;
|
|
|
|
case T_PartitionPruneStepCombine:
|
|
|
|
_outPartitionPruneStepCombine(str, obj);
|
|
|
|
break;
|
2008-09-09 20:58:09 +02:00
|
|
|
case T_PlanInvalItem:
|
|
|
|
_outPlanInvalItem(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_Alias:
|
|
|
|
_outAlias(str, obj);
|
|
|
|
break;
|
|
|
|
case T_RangeVar:
|
|
|
|
_outRangeVar(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
2017-03-08 16:39:37 +01:00
|
|
|
case T_TableFunc:
|
|
|
|
_outTableFunc(str, obj);
|
|
|
|
break;
|
2007-02-20 18:32:18 +01:00
|
|
|
case T_IntoClause:
|
|
|
|
_outIntoClause(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_Var:
|
|
|
|
_outVar(str, obj);
|
|
|
|
break;
|
|
|
|
case T_Const:
|
|
|
|
_outConst(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_Param:
|
|
|
|
_outParam(str, obj);
|
|
|
|
break;
|
1999-01-24 01:28:37 +01:00
|
|
|
case T_Aggref:
|
|
|
|
_outAggref(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
case T_GroupingFunc:
|
|
|
|
_outGroupingFunc(str, obj);
|
|
|
|
break;
|
2008-12-28 19:54:01 +01:00
|
|
|
case T_WindowFunc:
|
|
|
|
_outWindowFunc(str, obj);
|
|
|
|
break;
|
2019-02-01 16:50:32 +01:00
|
|
|
case T_SubscriptingRef:
|
|
|
|
_outSubscriptingRef(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_FuncExpr:
|
|
|
|
_outFuncExpr(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
2009-10-08 04:39:25 +02:00
|
|
|
case T_NamedArgExpr:
|
|
|
|
_outNamedArgExpr(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_OpExpr:
|
|
|
|
_outOpExpr(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_DistinctExpr:
|
|
|
|
_outDistinctExpr(str, obj);
|
|
|
|
break;
|
2011-03-20 01:29:08 +01:00
|
|
|
case T_NullIfExpr:
|
|
|
|
_outNullIfExpr(str, obj);
|
|
|
|
break;
|
2003-06-29 02:33:44 +02:00
|
|
|
case T_ScalarArrayOpExpr:
|
|
|
|
_outScalarArrayOpExpr(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_BoolExpr:
|
|
|
|
_outBoolExpr(str, obj);
|
|
|
|
break;
|
|
|
|
case T_SubLink:
|
|
|
|
_outSubLink(str, obj);
|
|
|
|
break;
|
2002-12-14 01:17:59 +01:00
|
|
|
case T_SubPlan:
|
|
|
|
_outSubPlan(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
2008-08-22 02:16:04 +02:00
|
|
|
case T_AlternativeSubPlan:
|
|
|
|
_outAlternativeSubPlan(str, obj);
|
|
|
|
break;
|
2000-09-12 23:07:18 +02:00
|
|
|
case T_FieldSelect:
|
|
|
|
_outFieldSelect(str, obj);
|
|
|
|
break;
|
2004-06-09 21:08:20 +02:00
|
|
|
case T_FieldStore:
|
|
|
|
_outFieldStore(str, obj);
|
|
|
|
break;
|
2000-09-12 23:07:18 +02:00
|
|
|
case T_RelabelType:
|
|
|
|
_outRelabelType(str, obj);
|
|
|
|
break;
|
2007-06-05 23:31:09 +02:00
|
|
|
case T_CoerceViaIO:
|
|
|
|
_outCoerceViaIO(str, obj);
|
|
|
|
break;
|
2007-03-28 01:21:12 +02:00
|
|
|
case T_ArrayCoerceExpr:
|
|
|
|
_outArrayCoerceExpr(str, obj);
|
|
|
|
break;
|
2004-12-12 00:26:51 +01:00
|
|
|
case T_ConvertRowtypeExpr:
|
|
|
|
_outConvertRowtypeExpr(str, obj);
|
|
|
|
break;
|
2011-03-11 22:27:51 +01:00
|
|
|
case T_CollateExpr:
|
|
|
|
_outCollateExpr(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_CaseExpr:
|
|
|
|
_outCaseExpr(str, obj);
|
2000-09-12 23:07:18 +02:00
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_CaseWhen:
|
|
|
|
_outCaseWhen(str, obj);
|
2000-09-29 20:21:41 +02:00
|
|
|
break;
|
2004-03-17 21:48:43 +01:00
|
|
|
case T_CaseTestExpr:
|
|
|
|
_outCaseTestExpr(str, obj);
|
|
|
|
break;
|
2003-04-09 01:20:04 +02:00
|
|
|
case T_ArrayExpr:
|
|
|
|
_outArrayExpr(str, obj);
|
|
|
|
break;
|
2004-05-11 00:44:49 +02:00
|
|
|
case T_RowExpr:
|
|
|
|
_outRowExpr(str, obj);
|
|
|
|
break;
|
2005-12-28 02:30:02 +01:00
|
|
|
case T_RowCompareExpr:
|
|
|
|
_outRowCompareExpr(str, obj);
|
|
|
|
break;
|
2003-02-16 03:30:39 +01:00
|
|
|
case T_CoalesceExpr:
|
|
|
|
_outCoalesceExpr(str, obj);
|
|
|
|
break;
|
2005-06-27 00:05:42 +02:00
|
|
|
case T_MinMaxExpr:
|
|
|
|
_outMinMaxExpr(str, obj);
|
|
|
|
break;
|
2016-08-17 02:33:01 +02:00
|
|
|
case T_SQLValueFunction:
|
|
|
|
_outSQLValueFunction(str, obj);
|
|
|
|
break;
|
2006-12-24 01:29:20 +01:00
|
|
|
case T_XmlExpr:
|
|
|
|
_outXmlExpr(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_NullTest:
|
|
|
|
_outNullTest(str, obj);
|
|
|
|
break;
|
|
|
|
case T_BooleanTest:
|
|
|
|
_outBooleanTest(str, obj);
|
|
|
|
break;
|
2003-02-03 22:15:45 +01:00
|
|
|
case T_CoerceToDomain:
|
|
|
|
_outCoerceToDomain(str, obj);
|
2002-12-12 16:49:42 +01:00
|
|
|
break;
|
2003-02-03 22:15:45 +01:00
|
|
|
case T_CoerceToDomainValue:
|
|
|
|
_outCoerceToDomainValue(str, obj);
|
2000-09-12 23:07:18 +02:00
|
|
|
break;
|
2003-07-03 18:34:26 +02:00
|
|
|
case T_SetToDefault:
|
|
|
|
_outSetToDefault(str, obj);
|
|
|
|
break;
|
2007-06-11 03:16:30 +02:00
|
|
|
case T_CurrentOfExpr:
|
|
|
|
_outCurrentOfExpr(str, obj);
|
|
|
|
break;
|
Code review for NextValueExpr expression node type.
Add missing infrastructure for this node type, notably in ruleutils.c where
its lack could demonstrably cause EXPLAIN to fail. Add outfuncs/readfuncs
support. (outfuncs support is useful today for debugging purposes. The
readfuncs support may never be needed, since at present it would only
matter for parallel query and NextValueExpr should never appear in a
parallelizable query; but it seems like a bad idea to have a primnode type
that isn't fully supported here.) Teach planner infrastructure that
NextValueExpr is a volatile, parallel-unsafe, non-leaky expression node
with cost cpu_operator_cost. Given its limited scope of usage, there
*might* be no live bug today from the lack of that knowledge, but it's
certainly going to bite us on the rear someday. Teach pg_stat_statements
about the new node type, too.
While at it, also teach cost_qual_eval() that MinMaxExpr, SQLValueFunction,
XmlExpr, and CoerceToDomain should be charged as cpu_operator_cost.
Failing to do this for SQLValueFunction was an oversight in my commit
0bb51aa96. The others are longer-standing oversights, but no time like the
present to fix them. (In principle, CoerceToDomain could have cost much
higher than this, but it doesn't presently seem worth trying to examine the
domain's constraints here.)
Modify execExprInterp.c to execute NextValueExpr as an out-of-line
function; it seems quite unlikely to me that it's worth insisting that
it be inlined in all expression eval methods. Besides, providing the
out-of-line function doesn't stop anyone from inlining if they want to.
Adjust some places where NextValueExpr support had been inserted with the
aid of a dartboard rather than keeping it in the same order as elsewhere.
Discussion: https://postgr.es/m/23862.1499981661@sss.pgh.pa.us
2017-07-14 21:25:43 +02:00
|
|
|
case T_NextValueExpr:
|
|
|
|
_outNextValueExpr(str, obj);
|
|
|
|
break;
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
case T_InferenceElem:
|
|
|
|
_outInferenceElem(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_TargetEntry:
|
|
|
|
_outTargetEntry(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_RangeTblRef:
|
|
|
|
_outRangeTblRef(str, obj);
|
2002-03-21 17:02:16 +01:00
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_JoinExpr:
|
|
|
|
_outJoinExpr(str, obj);
|
|
|
|
break;
|
|
|
|
case T_FromExpr:
|
|
|
|
_outFromExpr(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
case T_OnConflictExpr:
|
|
|
|
_outOnConflictExpr(str, obj);
|
|
|
|
break;
|
1997-09-08 04:41:22 +02:00
|
|
|
case T_Path:
|
|
|
|
_outPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_IndexPath:
|
|
|
|
_outIndexPath(str, obj);
|
|
|
|
break;
|
2005-04-20 00:35:18 +02:00
|
|
|
case T_BitmapHeapPath:
|
|
|
|
_outBitmapHeapPath(str, obj);
|
|
|
|
break;
|
2005-04-21 21:18:13 +02:00
|
|
|
case T_BitmapAndPath:
|
|
|
|
_outBitmapAndPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_BitmapOrPath:
|
|
|
|
_outBitmapOrPath(str, obj);
|
|
|
|
break;
|
1999-11-23 21:07:06 +01:00
|
|
|
case T_TidPath:
|
|
|
|
_outTidPath(str, obj);
|
|
|
|
break;
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
case T_SubqueryScanPath:
|
|
|
|
_outSubqueryScanPath(str, obj);
|
|
|
|
break;
|
2011-02-20 06:17:18 +01:00
|
|
|
case T_ForeignPath:
|
|
|
|
_outForeignPath(str, obj);
|
|
|
|
break;
|
2014-11-07 23:26:02 +01:00
|
|
|
case T_CustomPath:
|
|
|
|
_outCustomPath(str, obj);
|
|
|
|
break;
|
2000-11-12 01:37:02 +01:00
|
|
|
case T_AppendPath:
|
|
|
|
_outAppendPath(str, obj);
|
|
|
|
break;
|
2010-10-14 22:56:39 +02:00
|
|
|
case T_MergeAppendPath:
|
|
|
|
_outMergeAppendPath(str, obj);
|
|
|
|
break;
|
In the planner, replace an empty FROM clause with a dummy RTE.
The fact that "SELECT expression" has no base relations has long been a
thorn in the side of the planner. It makes it hard to flatten a sub-query
that looks like that, or is a trivial VALUES() item, because the planner
generally uses relid sets to identify sub-relations, and such a sub-query
would have an empty relid set if we flattened it. prepjointree.c contains
some baroque logic that works around this in certain special cases --- but
there is a much better answer. We can replace an empty FROM clause with a
dummy RTE that acts like a table of one row and no columns, and then there
are no such corner cases to worry about. Instead we need some logic to
get rid of useless dummy RTEs, but that's simpler and covers more cases
than what was there before.
For really trivial cases, where the query is just "SELECT expression" and
nothing else, there's a hazard that adding the extra RTE makes for a
noticeable slowdown; even though it's not much processing, there's not
that much for the planner to do overall. However testing says that the
penalty is very small, close to the noise level. In more complex queries,
this is able to find optimizations that we could not find before.
The new RTE type is called RTE_RESULT, since the "scan" plan type it
gives rise to is a Result node (the same plan we produced for a "SELECT
expression" query before). To avoid confusion, rename the old ResultPath
path type to GroupResultPath, reflecting that it's only used in degenerate
grouping cases where we know the query produces just one grouped row.
(It wouldn't work to unify the two cases, because there are different
rules about where the associated quals live during query_planner.)
Note: although this touches readfuncs.c, I don't think a catversion
bump is required, because the added case can't occur in stored rules,
only plans.
Patch by me, reviewed by David Rowley and Mark Dilger
Discussion: https://postgr.es/m/15944.1521127664@sss.pgh.pa.us
2019-01-28 23:54:10 +01:00
|
|
|
case T_GroupResultPath:
|
|
|
|
_outGroupResultPath(str, obj);
|
2002-11-06 01:00:45 +01:00
|
|
|
break;
|
2002-11-30 06:21:03 +01:00
|
|
|
case T_MaterialPath:
|
|
|
|
_outMaterialPath(str, obj);
|
|
|
|
break;
|
2003-01-20 19:55:07 +01:00
|
|
|
case T_UniquePath:
|
|
|
|
_outUniquePath(str, obj);
|
|
|
|
break;
|
2015-11-11 12:29:03 +01:00
|
|
|
case T_GatherPath:
|
|
|
|
_outGatherPath(str, obj);
|
|
|
|
break;
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
case T_ProjectionPath:
|
|
|
|
_outProjectionPath(str, obj);
|
|
|
|
break;
|
Move targetlist SRF handling from expression evaluation to new executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
2017-01-18 21:46:50 +01:00
|
|
|
case T_ProjectSetPath:
|
|
|
|
_outProjectSetPath(str, obj);
|
|
|
|
break;
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
|
|
|
case T_SortPath:
|
|
|
|
_outSortPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_GroupPath:
|
|
|
|
_outGroupPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_UpperUniquePath:
|
|
|
|
_outUpperUniquePath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_AggPath:
|
|
|
|
_outAggPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_GroupingSetsPath:
|
|
|
|
_outGroupingSetsPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_MinMaxAggPath:
|
|
|
|
_outMinMaxAggPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_WindowAggPath:
|
|
|
|
_outWindowAggPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_SetOpPath:
|
|
|
|
_outSetOpPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_RecursiveUnionPath:
|
|
|
|
_outRecursiveUnionPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_LockRowsPath:
|
|
|
|
_outLockRowsPath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_ModifyTablePath:
|
|
|
|
_outModifyTablePath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_LimitPath:
|
|
|
|
_outLimitPath(str, obj);
|
|
|
|
break;
|
2017-03-09 13:40:36 +01:00
|
|
|
case T_GatherMergePath:
|
|
|
|
_outGatherMergePath(str, obj);
|
|
|
|
break;
|
1999-02-12 07:43:53 +01:00
|
|
|
case T_NestPath:
|
|
|
|
_outNestPath(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
|
|
|
case T_MergePath:
|
|
|
|
_outMergePath(str, obj);
|
|
|
|
break;
|
|
|
|
case T_HashPath:
|
|
|
|
_outHashPath(str, obj);
|
|
|
|
break;
|
2007-02-19 08:03:34 +01:00
|
|
|
case T_PlannerGlobal:
|
|
|
|
_outPlannerGlobal(str, obj);
|
|
|
|
break;
|
2005-06-06 00:32:58 +02:00
|
|
|
case T_PlannerInfo:
|
|
|
|
_outPlannerInfo(str, obj);
|
|
|
|
break;
|
|
|
|
case T_RelOptInfo:
|
|
|
|
_outRelOptInfo(str, obj);
|
|
|
|
break;
|
|
|
|
case T_IndexOptInfo:
|
|
|
|
_outIndexOptInfo(str, obj);
|
|
|
|
break;
|
2016-06-18 21:22:34 +02:00
|
|
|
case T_ForeignKeyOptInfo:
|
|
|
|
_outForeignKeyOptInfo(str, obj);
|
|
|
|
break;
|
2007-01-20 21:45:41 +01:00
|
|
|
case T_EquivalenceClass:
|
|
|
|
_outEquivalenceClass(str, obj);
|
|
|
|
break;
|
|
|
|
case T_EquivalenceMember:
|
|
|
|
_outEquivalenceMember(str, obj);
|
|
|
|
break;
|
|
|
|
case T_PathKey:
|
|
|
|
_outPathKey(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
2016-03-14 21:59:59 +01:00
|
|
|
case T_PathTarget:
|
|
|
|
_outPathTarget(str, obj);
|
|
|
|
break;
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
case T_ParamPathInfo:
|
|
|
|
_outParamPathInfo(str, obj);
|
|
|
|
break;
|
1999-02-03 21:15:53 +01:00
|
|
|
case T_RestrictInfo:
|
|
|
|
_outRestrictInfo(str, obj);
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
Refactor the representation of indexable clauses in IndexPaths.
In place of three separate but interrelated lists (indexclauses,
indexquals, and indexqualcols), an IndexPath now has one list
"indexclauses" of IndexClause nodes. This holds basically the same
information as before, but in a more useful format: in particular, there
is now a clear connection between an indexclause (an original restriction
clause from WHERE or JOIN/ON) and the indexquals (directly usable index
conditions) derived from it.
We also change the ground rules a bit by mandating that clause commutation,
if needed, be done up-front so that what is stored in the indexquals list
is always directly usable as an index condition. This gets rid of repeated
re-determination of which side of the clause is the indexkey during costing
and plan generation, as well as repeated lookups of the commutator
operator. To minimize the added up-front cost, the typical case of
commuting a plain OpExpr is handled by a new special-purpose function
commute_restrictinfo(). For RowCompareExprs, generating the new clause
properly commuted to begin with is not really any more complex than before,
it's just different --- and we can save doing that work twice, as the
pretty-klugy original implementation did.
Tracking the connection between original and derived clauses lets us
also track explicitly whether the derived clauses are an exact or lossy
translation of the original. This provides a cheap solution to getting
rid of unnecessary rechecks of boolean index clauses, which previously
seemed like it'd be more expensive than it was worth.
Another pleasant (IMO) side-effect is that EXPLAIN now always shows
index clauses with the indexkey on the left; this seems less confusing.
This commit leaves expand_indexqual_conditions() and some related
functions in a slightly messy state. I didn't bother to change them
any more than minimally necessary to work with the new data structure,
because all that code is going to be refactored out of existence in
a follow-on patch.
Discussion: https://postgr.es/m/22182.1549124950@sss.pgh.pa.us
2019-02-09 23:30:43 +01:00
|
|
|
case T_IndexClause:
|
|
|
|
_outIndexClause(str, obj);
|
|
|
|
break;
|
2008-10-21 22:42:53 +02:00
|
|
|
case T_PlaceHolderVar:
|
|
|
|
_outPlaceHolderVar(str, obj);
|
|
|
|
break;
|
2008-08-14 20:48:00 +02:00
|
|
|
case T_SpecialJoinInfo:
|
|
|
|
_outSpecialJoinInfo(str, obj);
|
2003-01-20 19:55:07 +01:00
|
|
|
break;
|
2006-01-31 22:39:25 +01:00
|
|
|
case T_AppendRelInfo:
|
|
|
|
_outAppendRelInfo(str, obj);
|
|
|
|
break;
|
2008-10-21 22:42:53 +02:00
|
|
|
case T_PlaceHolderInfo:
|
|
|
|
_outPlaceHolderInfo(str, obj);
|
|
|
|
break;
|
2010-11-04 17:01:17 +01:00
|
|
|
case T_MinMaxAggInfo:
|
|
|
|
_outMinMaxAggInfo(str, obj);
|
|
|
|
break;
|
2007-02-19 08:03:34 +01:00
|
|
|
case T_PlannerParamItem:
|
|
|
|
_outPlannerParamItem(str, obj);
|
|
|
|
break;
|
2017-03-27 05:20:54 +02:00
|
|
|
case T_RollupData:
|
|
|
|
_outRollupData(str, obj);
|
|
|
|
break;
|
|
|
|
case T_GroupingSetData:
|
|
|
|
_outGroupingSetData(str, obj);
|
|
|
|
break;
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
case T_StatisticExtInfo:
|
|
|
|
_outStatisticExtInfo(str, obj);
|
|
|
|
break;
|
Introduce extensible node types.
An extensible node is always tagged T_Extensible, but the extnodename
field identifies it more specifically; it may also include arbitrary
private data. Extensible nodes can be copied, tested for equality,
serialized, and deserialized, but the core system doesn't know
anything about them otherwise. Some extensions may find it useful to
include these nodes in fdw_private or custom_private lists in lieu of
arm-wrestling their data into a format that the core code can
understand.
Along the way, so as not to burden the authors of such extensible
node types too much, expose the functions for writing serialized
tokens, and for serializing and deserializing bitmapsets.
KaiGai Kohei, per a design suggested by me. Reviewed by Andres Freund
and by me, and further edited by me.
2016-02-12 15:31:16 +01:00
|
|
|
case T_ExtensibleNode:
|
|
|
|
_outExtensibleNode(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_CreateStmt:
|
|
|
|
_outCreateStmt(str, obj);
|
|
|
|
break;
|
2011-01-02 05:48:11 +01:00
|
|
|
case T_CreateForeignTableStmt:
|
|
|
|
_outCreateForeignTableStmt(str, obj);
|
|
|
|
break;
|
2014-07-10 21:01:31 +02:00
|
|
|
case T_ImportForeignSchemaStmt:
|
|
|
|
_outImportForeignSchemaStmt(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_IndexStmt:
|
|
|
|
_outIndexStmt(str, obj);
|
|
|
|
break;
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
case T_CreateStatsStmt:
|
|
|
|
_outCreateStatsStmt(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_NotifyStmt:
|
|
|
|
_outNotifyStmt(str, obj);
|
|
|
|
break;
|
2003-03-10 04:53:52 +01:00
|
|
|
case T_DeclareCursorStmt:
|
|
|
|
_outDeclareCursorStmt(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_SelectStmt:
|
|
|
|
_outSelectStmt(str, obj);
|
|
|
|
break;
|
|
|
|
case T_ColumnDef:
|
|
|
|
_outColumnDef(str, obj);
|
|
|
|
break;
|
|
|
|
case T_TypeName:
|
|
|
|
_outTypeName(str, obj);
|
|
|
|
break;
|
|
|
|
case T_TypeCast:
|
|
|
|
_outTypeCast(str, obj);
|
|
|
|
break;
|
2011-03-11 22:27:51 +01:00
|
|
|
case T_CollateClause:
|
|
|
|
_outCollateClause(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_IndexElem:
|
|
|
|
_outIndexElem(str, obj);
|
|
|
|
break;
|
|
|
|
case T_Query:
|
|
|
|
_outQuery(str, obj);
|
|
|
|
break;
|
2013-07-18 23:10:16 +02:00
|
|
|
case T_WithCheckOption:
|
|
|
|
_outWithCheckOption(str, obj);
|
|
|
|
break;
|
2008-08-02 23:32:01 +02:00
|
|
|
case T_SortGroupClause:
|
|
|
|
_outSortGroupClause(str, obj);
|
2002-12-12 16:49:42 +01:00
|
|
|
break;
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
case T_GroupingSet:
|
|
|
|
_outGroupingSet(str, obj);
|
|
|
|
break;
|
2008-12-28 19:54:01 +01:00
|
|
|
case T_WindowClause:
|
|
|
|
_outWindowClause(str, obj);
|
|
|
|
break;
|
2006-04-30 20:30:40 +02:00
|
|
|
case T_RowMarkClause:
|
|
|
|
_outRowMarkClause(str, obj);
|
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case T_WithClause:
|
|
|
|
_outWithClause(str, obj);
|
|
|
|
break;
|
|
|
|
case T_CommonTableExpr:
|
|
|
|
_outCommonTableExpr(str, obj);
|
|
|
|
break;
|
2002-12-12 16:49:42 +01:00
|
|
|
case T_SetOperationStmt:
|
|
|
|
_outSetOperationStmt(str, obj);
|
|
|
|
break;
|
|
|
|
case T_RangeTblEntry:
|
|
|
|
_outRangeTblEntry(str, obj);
|
|
|
|
break;
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
case T_RangeTblFunction:
|
|
|
|
_outRangeTblFunction(str, obj);
|
|
|
|
break;
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
case T_TableSampleClause:
|
|
|
|
_outTableSampleClause(str, obj);
|
|
|
|
break;
|
1997-12-23 20:50:54 +01:00
|
|
|
case T_A_Expr:
|
|
|
|
_outAExpr(str, obj);
|
|
|
|
break;
|
2002-03-21 17:02:16 +01:00
|
|
|
case T_ColumnRef:
|
|
|
|
_outColumnRef(str, obj);
|
|
|
|
break;
|
|
|
|
case T_ParamRef:
|
|
|
|
_outParamRef(str, obj);
|
|
|
|
break;
|
2018-09-16 19:02:47 +02:00
|
|
|
case T_RawStmt:
|
|
|
|
_outRawStmt(str, obj);
|
|
|
|
break;
|
1997-12-23 20:50:54 +01:00
|
|
|
case T_A_Const:
|
|
|
|
_outAConst(str, obj);
|
|
|
|
break;
|
2008-08-30 03:39:14 +02:00
|
|
|
case T_A_Star:
|
|
|
|
_outA_Star(str, obj);
|
|
|
|
break;
|
2004-06-09 21:08:20 +02:00
|
|
|
case T_A_Indices:
|
|
|
|
_outA_Indices(str, obj);
|
|
|
|
break;
|
|
|
|
case T_A_Indirection:
|
|
|
|
_outA_Indirection(str, obj);
|
|
|
|
break;
|
2008-03-20 22:42:48 +01:00
|
|
|
case T_A_ArrayExpr:
|
|
|
|
_outA_ArrayExpr(str, obj);
|
|
|
|
break;
|
2004-06-09 21:08:20 +02:00
|
|
|
case T_ResTarget:
|
|
|
|
_outResTarget(str, obj);
|
2002-03-21 17:02:16 +01:00
|
|
|
break;
|
Implement UPDATE tab SET (col1,col2,...) = (SELECT ...), ...
This SQL-standard feature allows a sub-SELECT yielding multiple columns
(but only one row) to be used to compute the new values of several columns
to be updated. While the same results can be had with an independent
sub-SELECT per column, such a workaround can require a great deal of
duplicated computation.
The standard actually says that the source for a multi-column assignment
could be any row-valued expression. The implementation used here is
tightly tied to our existing sub-SELECT support and can't handle other
cases; the Bison grammar would have some issues with them too. However,
I don't feel too bad about this since other cases can be converted into
sub-SELECTs. For instance, "SET (a,b,c) = row_valued_function(x)" could
be written "SET (a,b,c) = (SELECT * FROM row_valued_function(x))".
2014-06-18 19:22:25 +02:00
|
|
|
case T_MultiAssignRef:
|
|
|
|
_outMultiAssignRef(str, obj);
|
|
|
|
break;
|
2008-07-17 18:02:12 +02:00
|
|
|
case T_SortBy:
|
|
|
|
_outSortBy(str, obj);
|
|
|
|
break;
|
2008-12-28 19:54:01 +01:00
|
|
|
case T_WindowDef:
|
|
|
|
_outWindowDef(str, obj);
|
|
|
|
break;
|
2008-10-04 23:56:55 +02:00
|
|
|
case T_RangeSubselect:
|
|
|
|
_outRangeSubselect(str, obj);
|
|
|
|
break;
|
|
|
|
case T_RangeFunction:
|
|
|
|
_outRangeFunction(str, obj);
|
|
|
|
break;
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
case T_RangeTableSample:
|
|
|
|
_outRangeTableSample(str, obj);
|
|
|
|
break;
|
2017-03-08 16:39:37 +01:00
|
|
|
case T_RangeTableFunc:
|
|
|
|
_outRangeTableFunc(str, obj);
|
|
|
|
break;
|
|
|
|
case T_RangeTableFuncCol:
|
|
|
|
_outRangeTableFuncCol(str, obj);
|
|
|
|
break;
|
1998-12-04 16:34:49 +01:00
|
|
|
case T_Constraint:
|
|
|
|
_outConstraint(str, obj);
|
|
|
|
break;
|
1999-02-23 09:01:47 +01:00
|
|
|
case T_FuncCall:
|
|
|
|
_outFuncCall(str, obj);
|
|
|
|
break;
|
2004-06-09 21:08:20 +02:00
|
|
|
case T_DefElem:
|
|
|
|
_outDefElem(str, obj);
|
|
|
|
break;
|
2012-01-07 13:58:13 +01:00
|
|
|
case T_TableLikeClause:
|
|
|
|
_outTableLikeClause(str, obj);
|
2010-11-13 06:34:45 +01:00
|
|
|
break;
|
2005-08-01 22:31:16 +02:00
|
|
|
case T_LockingClause:
|
|
|
|
_outLockingClause(str, obj);
|
|
|
|
break;
|
2007-02-03 15:06:56 +01:00
|
|
|
case T_XmlSerialize:
|
|
|
|
_outXmlSerialize(str, obj);
|
|
|
|
break;
|
2016-06-18 21:22:34 +02:00
|
|
|
case T_ForeignKeyCacheInfo:
|
|
|
|
_outForeignKeyCacheInfo(str, obj);
|
|
|
|
break;
|
2016-11-04 16:49:50 +01:00
|
|
|
case T_TriggerTransition:
|
|
|
|
_outTriggerTransition(str, obj);
|
|
|
|
break;
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
case T_PartitionElem:
|
|
|
|
_outPartitionElem(str, obj);
|
|
|
|
break;
|
Code review focused on new node types added by partitioning support.
Fix failure to check that we got a plain Const from const-simplification of
a coercion request. This is the cause of bug #14666 from Tian Bing: there
is an int4 to money cast, but it's only stable not immutable (because of
dependence on lc_monetary), resulting in a FuncExpr that the code was
miserably unequipped to deal with, or indeed even to notice that it was
failing to deal with. Add test cases around this coercion behavior.
In view of the above, sprinkle the code liberally with castNode() macros,
in hope of catching the next such bug a bit sooner. Also, change some
functions that were randomly declared to take Node* to take more specific
pointer types. And change some struct fields that were declared Node*
but could be given more specific types, allowing removal of assorted
explicit casts.
Place PARTITION_MAX_KEYS check a bit closer to the code it's protecting.
Likewise check only-one-key-for-list-partitioning restriction in a less
random place.
Avoid not-per-project-style usages like !strcmp(...).
Fix assorted failures to avoid scribbling on the input of parse
transformation. I'm not sure how necessary this is, but it's entirely
silly for these functions to be expending cycles to avoid that and not
getting it right.
Add guards against partitioning on system columns.
Put backend/nodes/ support code into an order that matches handling
of these node types elsewhere.
Annotate the fact that somebody added location fields to PartitionBoundSpec
and PartitionRangeDatum but forgot to handle them in
outfuncs.c/readfuncs.c. This is fairly harmless for production purposes
(since readfuncs.c would just substitute -1 anyway) but it's still bogus.
It's not worth forcing a post-beta1 initdb just to fix this, but if we
have another reason to force initdb before 10.0, we should go back and
clean this up.
Contrariwise, somebody added location fields to PartitionElem and
PartitionSpec but forgot to teach exprLocation() about them.
Consolidate duplicative code in transformPartitionBound().
Improve a couple of error messages.
Improve assorted commentary.
Re-pgindent the files touched by this patch; this affects a few comment
blocks that must have been added quite recently.
Report: https://postgr.es/m/20170524024550.29935.14396@wrigleys.postgresql.org
2017-05-29 05:20:28 +02:00
|
|
|
case T_PartitionSpec:
|
|
|
|
_outPartitionSpec(str, obj);
|
|
|
|
break;
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
case T_PartitionBoundSpec:
|
|
|
|
_outPartitionBoundSpec(str, obj);
|
|
|
|
break;
|
|
|
|
case T_PartitionRangeDatum:
|
|
|
|
_outPartitionRangeDatum(str, obj);
|
|
|
|
break;
|
1999-02-23 09:01:47 +01:00
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
default:
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-07-23 01:30:39 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* This should be an ERROR, but it's too useful to be able to
|
2016-04-08 23:26:36 +02:00
|
|
|
* dump structures that outNode only understands part of.
|
2003-07-23 01:30:39 +02:00
|
|
|
*/
|
|
|
|
elog(WARNING, "could not dump unrecognized node type: %d",
|
|
|
|
(int) nodeTag(obj));
|
1997-09-08 04:41:22 +02:00
|
|
|
break;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2000-01-14 01:53:21 +01:00
|
|
|
appendStringInfoChar(str, '}');
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* nodeToString -
|
1999-04-25 05:19:27 +02:00
|
|
|
* returns the ascii representation of the Node as a palloc'd string
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1998-02-26 05:46:47 +01:00
|
|
|
char *
|
2011-12-07 20:46:56 +01:00
|
|
|
nodeToString(const void *obj)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-05-25 18:15:34 +02:00
|
|
|
StringInfoData str;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-04-25 05:19:27 +02:00
|
|
|
/* see stringinfo.h for an explanation of this maneuver */
|
|
|
|
initStringInfo(&str);
|
2016-04-08 23:26:36 +02:00
|
|
|
outNode(&str, obj);
|
1999-04-25 05:19:27 +02:00
|
|
|
return str.data;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2016-09-16 15:36:19 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* bmsToString -
|
|
|
|
* returns the ascii representation of the Bitmapset as a palloc'd string
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
bmsToString(const Bitmapset *bms)
|
|
|
|
{
|
|
|
|
StringInfoData str;
|
|
|
|
|
|
|
|
/* see stringinfo.h for an explanation of this maneuver */
|
|
|
|
initStringInfo(&str);
|
|
|
|
outBitmapset(&str, bms);
|
|
|
|
return str.data;
|
|
|
|
}
|