postgresql/src/backend/nodes/outfuncs.c

3527 lines
81 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* outfuncs.c
* Output functions for Postgres tree nodes.
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/nodes/outfuncs.c
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
* have an output function defined here (as well as an input function
* in readfuncs.c). In addition, plan nodes should have input and
* output functions so that they can be sent to parallel workers.
* For use in debugging, we also provide output functions for nodes
* that appear in raw parsetrees and path. These nodes however need
* not have input functions.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "lib/stringinfo.h"
#include "nodes/plannodes.h"
#include "nodes/relation.h"
1999-07-16 07:00:38 +02:00
#include "utils/datum.h"
/*
* Macros to simplify output of different kinds of fields. Use these
* wherever possible to reduce the chance for silly typos. Note that these
* hard-wire conventions about the names of the local variables in an Out
* routine.
*/
/* Write the label for the node type */
#define WRITE_NODE_TYPE(nodelabel) \
appendStringInfoString(str, nodelabel)
/* Write an integer field (anything written as ":fldname %d") */
#define WRITE_INT_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname)
/* Write an unsigned integer field (anything written as ":fldname %u") */
#define WRITE_UINT_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
/* Write an OID field (don't hard-wire assumption that OID is same as uint) */
#define WRITE_OID_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
/* Write a long-integer field */
#define WRITE_LONG_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %ld", node->fldname)
/* Write a char field (ie, one ascii character) */
#define WRITE_CHAR_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %c", node->fldname)
/* Write an enumerated-type field as an integer code */
#define WRITE_ENUM_FIELD(fldname, enumtype) \
appendStringInfo(str, " :" CppAsString(fldname) " %d", \
(int) node->fldname)
/* Write a float field --- caller must give format to define precision */
#define WRITE_FLOAT_FIELD(fldname,format) \
appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname)
/* Write a boolean field */
#define WRITE_BOOL_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %s", \
booltostr(node->fldname))
/* Write a character-string (possibly NULL) field */
#define WRITE_STRING_FIELD(fldname) \
(appendStringInfo(str, " :" CppAsString(fldname) " "), \
_outToken(str, node->fldname))
/* Write a parse location field (actually same as INT case) */
#define WRITE_LOCATION_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname)
/* Write a Node field */
#define WRITE_NODE_FIELD(fldname) \
(appendStringInfo(str, " :" CppAsString(fldname) " "), \
_outNode(str, node->fldname))
/* Write a bitmapset field */
#define WRITE_BITMAPSET_FIELD(fldname) \
(appendStringInfo(str, " :" CppAsString(fldname) " "), \
_outBitmapset(str, node->fldname))
#define booltostr(x) ((x) ? "true" : "false")
static void _outNode(StringInfo str, const void *obj);
/*
* _outToken
* Convert an ordinary string (eg, an identifier) into a form that
* will be decoded back to a plain token by read.c's functions.
*
* If a null or empty string is given, it is encoded as "<>".
*/
static void
_outToken(StringInfo str, const char *s)
{
if (s == NULL || *s == '\0')
{
appendStringInfoString(str, "<>");
return;
}
/*
2005-10-15 04:49:52 +02:00
* Look for characters or patterns that are treated specially by read.c
* (either in pg_strtok() or in nodeRead()), and therefore need a
* protective backslash.
*/
/* These characters only need to be quoted at the start of the string */
if (*s == '<' ||
*s == '\"' ||
isdigit((unsigned char) *s) ||
((*s == '+' || *s == '-') &&
(isdigit((unsigned char) s[1]) || s[1] == '.')))
appendStringInfoChar(str, '\\');
while (*s)
{
/* These chars must be backslashed anywhere in the string */
if (*s == ' ' || *s == '\n' || *s == '\t' ||
*s == '(' || *s == ')' || *s == '{' || *s == '}' ||
*s == '\\')
appendStringInfoChar(str, '\\');
appendStringInfoChar(str, *s++);
}
}
static void
_outList(StringInfo str, const List *node)
{
const ListCell *lc;
appendStringInfoChar(str, '(');
if (IsA(node, IntList))
appendStringInfoChar(str, 'i');
else if (IsA(node, OidList))
appendStringInfoChar(str, 'o');
2004-08-29 07:07:03 +02:00
foreach(lc, node)
{
/*
* For the sake of backward compatibility, we emit a slightly
2005-10-15 04:49:52 +02:00
* different whitespace format for lists of nodes vs. other types of
* lists. XXX: is this necessary?
*/
if (IsA(node, List))
{
_outNode(str, lfirst(lc));
if (lnext(lc))
appendStringInfoChar(str, ' ');
}
else if (IsA(node, IntList))
appendStringInfo(str, " %d", lfirst_int(lc));
else if (IsA(node, OidList))
appendStringInfo(str, " %u", lfirst_oid(lc));
else
2004-08-29 07:07:03 +02:00
elog(ERROR, "unrecognized list node type: %d",
(int) node->type);
}
appendStringInfoChar(str, ')');
}
/*
* _outBitmapset -
* converts a bitmap set of integers
*
* Note: the output format is "(b int int ...)", similar to an integer List.
*/
static void
_outBitmapset(StringInfo str, const Bitmapset *bms)
{
int x;
appendStringInfoChar(str, '(');
appendStringInfoChar(str, 'b');
x = -1;
while ((x = bms_next_member(bms, x)) >= 0)
appendStringInfo(str, " %d", x);
appendStringInfoChar(str, ')');
}
/*
* Print the value of a Datum given its type.
*/
static void
_outDatum(StringInfo str, Datum value, int typlen, bool typbyval)
{
Size length,
i;
char *s;
length = datumGetSize(value, typbyval, typlen);
1999-01-21 17:08:55 +01:00
if (typbyval)
{
s = (char *) (&value);
appendStringInfo(str, "%u [ ", (unsigned int) length);
for (i = 0; i < (Size) sizeof(Datum); i++)
appendStringInfo(str, "%d ", (int) (s[i]));
appendStringInfoChar(str, ']');
}
else
{
s = (char *) DatumGetPointer(value);
if (!PointerIsValid(s))
appendStringInfoString(str, "0 [ ]");
else
{
appendStringInfo(str, "%u [ ", (unsigned int) length);
for (i = 0; i < length; i++)
appendStringInfo(str, "%d ", (int) (s[i]));
appendStringInfoChar(str, ']');
}
}
}
/*
* Stuff from plannodes.h
*/
static void
_outPlannedStmt(StringInfo str, const PlannedStmt *node)
{
WRITE_NODE_TYPE("PLANNEDSTMT");
WRITE_ENUM_FIELD(commandType, CmdType);
WRITE_UINT_FIELD(queryId);
WRITE_BOOL_FIELD(hasReturning);
WRITE_BOOL_FIELD(hasModifyingCTE);
WRITE_BOOL_FIELD(canSetTag);
WRITE_BOOL_FIELD(transientPlan);
WRITE_NODE_FIELD(planTree);
WRITE_NODE_FIELD(rtable);
WRITE_NODE_FIELD(resultRelations);
WRITE_NODE_FIELD(utilityStmt);
WRITE_NODE_FIELD(subplans);
WRITE_BITMAPSET_FIELD(rewindPlanIDs);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(relationOids);
WRITE_NODE_FIELD(invalItems);
WRITE_INT_FIELD(nParamExec);
WRITE_BOOL_FIELD(hasRowSecurity);
Determine whether it's safe to attempt a parallel plan for a query. Commit 924bcf4f16d54c55310b28f77686608684734f42 introduced a framework for parallel computation in PostgreSQL that makes most but not all built-in functions safe to execute in parallel mode. In order to have parallel query, we'll need to be able to determine whether that query contains functions (either built-in or user-defined) that cannot be safely executed in parallel mode. This requires those functions to be labeled, so this patch introduces an infrastructure for that. Some functions currently labeled as safe may need to be revised depending on how pending issues related to heavyweight locking under paralllelism are resolved. Parallel plans can't be used except for the case where the query will run to completion. If portal execution were suspended, the parallel mode restrictions would need to remain in effect during that time, but that might make other queries fail. Therefore, this patch introduces a framework that enables consideration of parallel plans only when it is known that the plan will be run to completion. This probably needs some refinement; for example, at bind time, we do not know whether a query run via the extended protocol will be execution to completion or run with a limited fetch count. Having the client indicate its intentions at bind time would constitute a wire protocol break. Some contexts in which parallel mode would be safe are not adjusted by this patch; the default is not to try parallel plans except from call sites that have been updated to say that such plans are OK. This commit doesn't introduce any parallel paths or plans; it just provides a way to determine whether they could potentially be used. I'm committing it on the theory that the remaining parallel sequential scan patches will also get committed to this release, hopefully in the not-too-distant future. Robert Haas and Amit Kapila. Reviewed (in earlier versions) by Noah Misch.
2015-09-16 21:38:47 +02:00
WRITE_BOOL_FIELD(parallelModeNeeded);
}
/*
* print the basic stuff of all nodes that inherit from Plan
*/
static void
_outPlanInfo(StringInfo str, const Plan *node)
{
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
WRITE_FLOAT_FIELD(total_cost, "%.2f");
WRITE_FLOAT_FIELD(plan_rows, "%.0f");
WRITE_INT_FIELD(plan_width);
WRITE_BOOL_FIELD(parallel_aware);
WRITE_INT_FIELD(plan_node_id);
WRITE_NODE_FIELD(targetlist);
WRITE_NODE_FIELD(qual);
WRITE_NODE_FIELD(lefttree);
WRITE_NODE_FIELD(righttree);
WRITE_NODE_FIELD(initPlan);
WRITE_BITMAPSET_FIELD(extParam);
WRITE_BITMAPSET_FIELD(allParam);
}
/*
* print the basic stuff of all nodes that inherit from Scan
*/
static void
_outScanInfo(StringInfo str, const Scan *node)
{
_outPlanInfo(str, (const Plan *) node);
WRITE_UINT_FIELD(scanrelid);
}
/*
* print the basic stuff of all nodes that inherit from Join
*/
static void
_outJoinPlanInfo(StringInfo str, const Join *node)
{
_outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_NODE_FIELD(joinqual);
}
static void
_outPlan(StringInfo str, const Plan *node)
{
WRITE_NODE_TYPE("PLAN");
_outPlanInfo(str, (const Plan *) node);
}
static void
_outResult(StringInfo str, const Result *node)
{
WRITE_NODE_TYPE("RESULT");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(resconstantqual);
}
static void
_outModifyTable(StringInfo str, const ModifyTable *node)
{
WRITE_NODE_TYPE("MODIFYTABLE");
_outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(operation, CmdType);
WRITE_BOOL_FIELD(canSetTag);
WRITE_UINT_FIELD(nominalRelation);
WRITE_NODE_FIELD(resultRelations);
WRITE_INT_FIELD(resultRelIndex);
WRITE_NODE_FIELD(plans);
WRITE_NODE_FIELD(withCheckOptionLists);
WRITE_NODE_FIELD(returningLists);
WRITE_NODE_FIELD(fdwPrivLists);
WRITE_NODE_FIELD(rowMarks);
WRITE_INT_FIELD(epqParam);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
WRITE_ENUM_FIELD(onConflictAction, OnConflictAction);
WRITE_NODE_FIELD(arbiterIndexes);
WRITE_NODE_FIELD(onConflictSet);
WRITE_NODE_FIELD(onConflictWhere);
WRITE_UINT_FIELD(exclRelRTI);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
WRITE_NODE_FIELD(exclRelTlist);
}
static void
_outAppend(StringInfo str, const Append *node)
{
WRITE_NODE_TYPE("APPEND");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(appendplans);
}
static void
_outMergeAppend(StringInfo str, const MergeAppend *node)
{
int i;
WRITE_NODE_TYPE("MERGEAPPEND");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(mergeplans);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :sortColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->sortColIdx[i]);
appendStringInfoString(str, " :sortOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->sortOperators[i]);
appendStringInfoString(str, " :collations");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->collations[i]);
appendStringInfoString(str, " :nullsFirst");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %s", booltostr(node->nullsFirst[i]));
}
static void
_outRecursiveUnion(StringInfo str, const RecursiveUnion *node)
{
int i;
WRITE_NODE_TYPE("RECURSIVEUNION");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(wtParam);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :dupColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->dupColIdx[i]);
appendStringInfoString(str, " :dupOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->dupOperators[i]);
WRITE_LONG_FIELD(numGroups);
}
static void
_outBitmapAnd(StringInfo str, const BitmapAnd *node)
{
WRITE_NODE_TYPE("BITMAPAND");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(bitmapplans);
}
static void
_outBitmapOr(StringInfo str, const BitmapOr *node)
{
WRITE_NODE_TYPE("BITMAPOR");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(bitmapplans);
}
static void
_outGather(StringInfo str, const Gather *node)
{
WRITE_NODE_TYPE("GATHER");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(num_workers);
WRITE_BOOL_FIELD(single_copy);
}
static void
_outScan(StringInfo str, const Scan *node)
{
WRITE_NODE_TYPE("SCAN");
_outScanInfo(str, node);
}
static void
_outSeqScan(StringInfo str, const SeqScan *node)
{
WRITE_NODE_TYPE("SEQSCAN");
_outScanInfo(str, (const Scan *) node);
}
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
static void
_outSampleScan(StringInfo str, const SampleScan *node)
{
WRITE_NODE_TYPE("SAMPLESCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(tablesample);
}
static void
_outIndexScan(StringInfo str, const IndexScan *node)
{
WRITE_NODE_TYPE("INDEXSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_OID_FIELD(indexid);
WRITE_NODE_FIELD(indexqual);
WRITE_NODE_FIELD(indexqualorig);
WRITE_NODE_FIELD(indexorderby);
WRITE_NODE_FIELD(indexorderbyorig);
WRITE_NODE_FIELD(indexorderbyops);
WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
}
static void
_outIndexOnlyScan(StringInfo str, const IndexOnlyScan *node)
{
WRITE_NODE_TYPE("INDEXONLYSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_OID_FIELD(indexid);
WRITE_NODE_FIELD(indexqual);
WRITE_NODE_FIELD(indexorderby);
WRITE_NODE_FIELD(indextlist);
WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
}
static void
_outBitmapIndexScan(StringInfo str, const BitmapIndexScan *node)
{
WRITE_NODE_TYPE("BITMAPINDEXSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_OID_FIELD(indexid);
WRITE_NODE_FIELD(indexqual);
WRITE_NODE_FIELD(indexqualorig);
}
static void
_outBitmapHeapScan(StringInfo str, const BitmapHeapScan *node)
{
WRITE_NODE_TYPE("BITMAPHEAPSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(bitmapqualorig);
}
static void
_outTidScan(StringInfo str, const TidScan *node)
{
WRITE_NODE_TYPE("TIDSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(tidquals);
}
static void
_outSubqueryScan(StringInfo str, const SubqueryScan *node)
{
WRITE_NODE_TYPE("SUBQUERYSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(subplan);
}
static void
_outFunctionScan(StringInfo str, const FunctionScan *node)
{
WRITE_NODE_TYPE("FUNCTIONSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(functions);
WRITE_BOOL_FIELD(funcordinality);
}
static void
_outValuesScan(StringInfo str, const ValuesScan *node)
{
WRITE_NODE_TYPE("VALUESSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(values_lists);
}
static void
_outCteScan(StringInfo str, const CteScan *node)
{
WRITE_NODE_TYPE("CTESCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_INT_FIELD(ctePlanId);
WRITE_INT_FIELD(cteParam);
}
static void
_outWorkTableScan(StringInfo str, const WorkTableScan *node)
{
WRITE_NODE_TYPE("WORKTABLESCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_INT_FIELD(wtParam);
}
static void
_outForeignScan(StringInfo str, const ForeignScan *node)
{
WRITE_NODE_TYPE("FOREIGNSCAN");
_outScanInfo(str, (const Scan *) node);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_OID_FIELD(fs_server);
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
WRITE_NODE_FIELD(fdw_exprs);
WRITE_NODE_FIELD(fdw_private);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_NODE_FIELD(fdw_scan_tlist);
WRITE_NODE_FIELD(fdw_recheck_quals);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_BITMAPSET_FIELD(fs_relids);
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
WRITE_BOOL_FIELD(fsSystemCol);
}
static void
_outCustomScan(StringInfo str, const CustomScan *node)
{
WRITE_NODE_TYPE("CUSTOMSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_UINT_FIELD(flags);
WRITE_NODE_FIELD(custom_plans);
WRITE_NODE_FIELD(custom_exprs);
WRITE_NODE_FIELD(custom_private);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_NODE_FIELD(custom_scan_tlist);
WRITE_BITMAPSET_FIELD(custom_relids);
/* Dump library and symbol name instead of raw pointer */
appendStringInfoString(str, " :methods ");
_outToken(str, node->methods->LibraryName);
appendStringInfoChar(str, ' ');
_outToken(str, node->methods->SymbolName);
}
static void
_outJoin(StringInfo str, const Join *node)
{
WRITE_NODE_TYPE("JOIN");
_outJoinPlanInfo(str, (const Join *) node);
}
static void
_outNestLoop(StringInfo str, const NestLoop *node)
{
WRITE_NODE_TYPE("NESTLOOP");
_outJoinPlanInfo(str, (const Join *) node);
WRITE_NODE_FIELD(nestParams);
}
static void
_outMergeJoin(StringInfo str, const MergeJoin *node)
{
int numCols;
int i;
WRITE_NODE_TYPE("MERGEJOIN");
_outJoinPlanInfo(str, (const Join *) node);
WRITE_NODE_FIELD(mergeclauses);
numCols = list_length(node->mergeclauses);
appendStringInfoString(str, " :mergeFamilies");
for (i = 0; i < numCols; i++)
appendStringInfo(str, " %u", node->mergeFamilies[i]);
appendStringInfoString(str, " :mergeCollations");
for (i = 0; i < numCols; i++)
appendStringInfo(str, " %u", node->mergeCollations[i]);
appendStringInfoString(str, " :mergeStrategies");
for (i = 0; i < numCols; i++)
appendStringInfo(str, " %d", node->mergeStrategies[i]);
appendStringInfoString(str, " :mergeNullsFirst");
for (i = 0; i < numCols; i++)
appendStringInfo(str, " %s", booltostr(node->mergeNullsFirst[i]));
}
static void
_outHashJoin(StringInfo str, const HashJoin *node)
{
WRITE_NODE_TYPE("HASHJOIN");
_outJoinPlanInfo(str, (const Join *) node);
WRITE_NODE_FIELD(hashclauses);
}
static void
_outAgg(StringInfo str, const Agg *node)
{
int i;
WRITE_NODE_TYPE("AGG");
_outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :grpColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->grpColIdx[i]);
appendStringInfoString(str, " :grpOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->grpOperators[i]);
WRITE_LONG_FIELD(numGroups);
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
WRITE_NODE_FIELD(groupingSets);
WRITE_NODE_FIELD(chain);
}
static void
_outWindowAgg(StringInfo str, const WindowAgg *node)
{
int i;
WRITE_NODE_TYPE("WINDOWAGG");
_outPlanInfo(str, (const Plan *) node);
WRITE_UINT_FIELD(winref);
WRITE_INT_FIELD(partNumCols);
appendStringInfoString(str, " :partColIdx");
for (i = 0; i < node->partNumCols; i++)
appendStringInfo(str, " %d", node->partColIdx[i]);
appendStringInfoString(str, " :partOperations");
for (i = 0; i < node->partNumCols; i++)
appendStringInfo(str, " %u", node->partOperators[i]);
WRITE_INT_FIELD(ordNumCols);
appendStringInfoString(str, " :ordColIdx");
for (i = 0; i < node->ordNumCols; i++)
appendStringInfo(str, " %d", node->ordColIdx[i]);
appendStringInfoString(str, " :ordOperations");
for (i = 0; i < node->ordNumCols; i++)
appendStringInfo(str, " %u", node->ordOperators[i]);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
}
static void
_outGroup(StringInfo str, const Group *node)
{
int i;
WRITE_NODE_TYPE("GROUP");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :grpColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->grpColIdx[i]);
appendStringInfoString(str, " :grpOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->grpOperators[i]);
}
static void
_outMaterial(StringInfo str, const Material *node)
{
WRITE_NODE_TYPE("MATERIAL");
_outPlanInfo(str, (const Plan *) node);
}
static void
_outSort(StringInfo str, const Sort *node)
{
int i;
WRITE_NODE_TYPE("SORT");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :sortColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->sortColIdx[i]);
appendStringInfoString(str, " :sortOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->sortOperators[i]);
appendStringInfoString(str, " :collations");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->collations[i]);
appendStringInfoString(str, " :nullsFirst");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %s", booltostr(node->nullsFirst[i]));
}
static void
_outUnique(StringInfo str, const Unique *node)
{
2001-03-22 05:01:46 +01:00
int i;
WRITE_NODE_TYPE("UNIQUE");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :uniqColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->uniqColIdx[i]);
appendStringInfoString(str, " :uniqOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->uniqOperators[i]);
}
static void
_outHash(StringInfo str, const Hash *node)
{
WRITE_NODE_TYPE("HASH");
_outPlanInfo(str, (const Plan *) node);
WRITE_OID_FIELD(skewTable);
WRITE_INT_FIELD(skewColumn);
WRITE_BOOL_FIELD(skewInherit);
WRITE_OID_FIELD(skewColType);
WRITE_INT_FIELD(skewColTypmod);
}
static void
_outSetOp(StringInfo str, const SetOp *node)
{
2001-03-22 05:01:46 +01:00
int i;
WRITE_NODE_TYPE("SETOP");
_outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(cmd, SetOpCmd);
WRITE_ENUM_FIELD(strategy, SetOpStrategy);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :dupColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->dupColIdx[i]);
appendStringInfoString(str, " :dupOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->dupOperators[i]);
WRITE_INT_FIELD(flagColIdx);
WRITE_INT_FIELD(firstFlag);
WRITE_LONG_FIELD(numGroups);
}
static void
_outLockRows(StringInfo str, const LockRows *node)
{
WRITE_NODE_TYPE("LOCKROWS");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(rowMarks);
WRITE_INT_FIELD(epqParam);
}
static void
_outLimit(StringInfo str, const Limit *node)
{
WRITE_NODE_TYPE("LIMIT");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(limitOffset);
WRITE_NODE_FIELD(limitCount);
}
static void
_outNestLoopParam(StringInfo str, const NestLoopParam *node)
{
WRITE_NODE_TYPE("NESTLOOPPARAM");
WRITE_INT_FIELD(paramno);
WRITE_NODE_FIELD(paramval);
}
static void
_outPlanRowMark(StringInfo str, const PlanRowMark *node)
{
WRITE_NODE_TYPE("PLANROWMARK");
WRITE_UINT_FIELD(rti);
WRITE_UINT_FIELD(prti);
WRITE_UINT_FIELD(rowmarkId);
WRITE_ENUM_FIELD(markType, RowMarkType);
2015-03-15 23:41:47 +01:00
WRITE_INT_FIELD(allMarkTypes);
WRITE_ENUM_FIELD(strength, LockClauseStrength);
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
WRITE_BOOL_FIELD(isParent);
}
static void
_outPlanInvalItem(StringInfo str, const PlanInvalItem *node)
{
WRITE_NODE_TYPE("PLANINVALITEM");
WRITE_INT_FIELD(cacheId);
WRITE_UINT_FIELD(hashValue);
}
/*****************************************************************************
*
* Stuff from primnodes.h.
*
*****************************************************************************/
static void
_outAlias(StringInfo str, const Alias *node)
{
WRITE_NODE_TYPE("ALIAS");
WRITE_STRING_FIELD(aliasname);
WRITE_NODE_FIELD(colnames);
}
static void
_outRangeVar(StringInfo str, const RangeVar *node)
{
WRITE_NODE_TYPE("RANGEVAR");
/*
* we deliberately ignore catalogname here, since it is presently not
* semantically meaningful
*/
WRITE_STRING_FIELD(schemaname);
WRITE_STRING_FIELD(relname);
WRITE_ENUM_FIELD(inhOpt, InhOption);
WRITE_CHAR_FIELD(relpersistence);
WRITE_NODE_FIELD(alias);
WRITE_LOCATION_FIELD(location);
}
static void
_outIntoClause(StringInfo str, const IntoClause *node)
{
WRITE_NODE_TYPE("INTOCLAUSE");
WRITE_NODE_FIELD(rel);
WRITE_NODE_FIELD(colNames);
WRITE_NODE_FIELD(options);
WRITE_ENUM_FIELD(onCommit, OnCommitAction);
WRITE_STRING_FIELD(tableSpaceName);
WRITE_NODE_FIELD(viewQuery);
WRITE_BOOL_FIELD(skipData);
}
static void
_outVar(StringInfo str, const Var *node)
{
WRITE_NODE_TYPE("VAR");
WRITE_UINT_FIELD(varno);
WRITE_INT_FIELD(varattno);
WRITE_OID_FIELD(vartype);
WRITE_INT_FIELD(vartypmod);
WRITE_OID_FIELD(varcollid);
WRITE_UINT_FIELD(varlevelsup);
WRITE_UINT_FIELD(varnoold);
WRITE_INT_FIELD(varoattno);
WRITE_LOCATION_FIELD(location);
}
static void
_outConst(StringInfo str, const Const *node)
{
WRITE_NODE_TYPE("CONST");
WRITE_OID_FIELD(consttype);
WRITE_INT_FIELD(consttypmod);
WRITE_OID_FIELD(constcollid);
WRITE_INT_FIELD(constlen);
WRITE_BOOL_FIELD(constbyval);
WRITE_BOOL_FIELD(constisnull);
WRITE_LOCATION_FIELD(location);
appendStringInfoString(str, " :constvalue ");
if (node->constisnull)
appendStringInfoString(str, "<>");
else
_outDatum(str, node->constvalue, node->constlen, node->constbyval);
}
static void
_outParam(StringInfo str, const Param *node)
{
WRITE_NODE_TYPE("PARAM");
WRITE_ENUM_FIELD(paramkind, ParamKind);
WRITE_INT_FIELD(paramid);
WRITE_OID_FIELD(paramtype);
WRITE_INT_FIELD(paramtypmod);
WRITE_OID_FIELD(paramcollid);
WRITE_LOCATION_FIELD(location);
}
static void
_outAggref(StringInfo str, const Aggref *node)
{
WRITE_NODE_TYPE("AGGREF");
WRITE_OID_FIELD(aggfnoid);
WRITE_OID_FIELD(aggtype);
WRITE_OID_FIELD(aggcollid);
WRITE_OID_FIELD(inputcollid);
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
WRITE_NODE_FIELD(aggdirectargs);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(aggorder);
WRITE_NODE_FIELD(aggdistinct);
WRITE_NODE_FIELD(aggfilter);
WRITE_BOOL_FIELD(aggstar);
WRITE_BOOL_FIELD(aggvariadic);
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
WRITE_CHAR_FIELD(aggkind);
WRITE_UINT_FIELD(agglevelsup);
WRITE_LOCATION_FIELD(location);
}
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
static void
_outGroupingFunc(StringInfo str, const GroupingFunc *node)
{
WRITE_NODE_TYPE("GROUPINGFUNC");
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(refs);
WRITE_NODE_FIELD(cols);
WRITE_UINT_FIELD(agglevelsup);
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
WRITE_LOCATION_FIELD(location);
}
static void
_outWindowFunc(StringInfo str, const WindowFunc *node)
{
WRITE_NODE_TYPE("WINDOWFUNC");
WRITE_OID_FIELD(winfnoid);
WRITE_OID_FIELD(wintype);
WRITE_OID_FIELD(wincollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(aggfilter);
WRITE_UINT_FIELD(winref);
WRITE_BOOL_FIELD(winstar);
WRITE_BOOL_FIELD(winagg);
WRITE_LOCATION_FIELD(location);
}
static void
_outArrayRef(StringInfo str, const ArrayRef *node)
{
WRITE_NODE_TYPE("ARRAYREF");
WRITE_OID_FIELD(refarraytype);
WRITE_OID_FIELD(refelemtype);
WRITE_INT_FIELD(reftypmod);
WRITE_OID_FIELD(refcollid);
WRITE_NODE_FIELD(refupperindexpr);
WRITE_NODE_FIELD(reflowerindexpr);
WRITE_NODE_FIELD(refexpr);
WRITE_NODE_FIELD(refassgnexpr);
}
static void
_outFuncExpr(StringInfo str, const FuncExpr *node)
{
WRITE_NODE_TYPE("FUNCEXPR");
WRITE_OID_FIELD(funcid);
WRITE_OID_FIELD(funcresulttype);
WRITE_BOOL_FIELD(funcretset);
WRITE_BOOL_FIELD(funcvariadic);
WRITE_ENUM_FIELD(funcformat, CoercionForm);
WRITE_OID_FIELD(funccollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outNamedArgExpr(StringInfo str, const NamedArgExpr *node)
{
WRITE_NODE_TYPE("NAMEDARGEXPR");
WRITE_NODE_FIELD(arg);
WRITE_STRING_FIELD(name);
WRITE_INT_FIELD(argnumber);
WRITE_LOCATION_FIELD(location);
}
static void
_outOpExpr(StringInfo str, const OpExpr *node)
{
WRITE_NODE_TYPE("OPEXPR");
WRITE_OID_FIELD(opno);
WRITE_OID_FIELD(opfuncid);
WRITE_OID_FIELD(opresulttype);
WRITE_BOOL_FIELD(opretset);
WRITE_OID_FIELD(opcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outDistinctExpr(StringInfo str, const DistinctExpr *node)
{
WRITE_NODE_TYPE("DISTINCTEXPR");
WRITE_OID_FIELD(opno);
WRITE_OID_FIELD(opfuncid);
WRITE_OID_FIELD(opresulttype);
WRITE_BOOL_FIELD(opretset);
WRITE_OID_FIELD(opcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outNullIfExpr(StringInfo str, const NullIfExpr *node)
{
WRITE_NODE_TYPE("NULLIFEXPR");
WRITE_OID_FIELD(opno);
WRITE_OID_FIELD(opfuncid);
WRITE_OID_FIELD(opresulttype);
WRITE_BOOL_FIELD(opretset);
WRITE_OID_FIELD(opcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outScalarArrayOpExpr(StringInfo str, const ScalarArrayOpExpr *node)
{
WRITE_NODE_TYPE("SCALARARRAYOPEXPR");
WRITE_OID_FIELD(opno);
WRITE_OID_FIELD(opfuncid);
WRITE_BOOL_FIELD(useOr);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outBoolExpr(StringInfo str, const BoolExpr *node)
{
char *opstr = NULL;
WRITE_NODE_TYPE("BOOLEXPR");
/* do-it-yourself enum representation */
switch (node->boolop)
{
case AND_EXPR:
opstr = "and";
break;
case OR_EXPR:
opstr = "or";
break;
case NOT_EXPR:
opstr = "not";
break;
}
appendStringInfoString(str, " :boolop ");
_outToken(str, opstr);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outSubLink(StringInfo str, const SubLink *node)
{
WRITE_NODE_TYPE("SUBLINK");
WRITE_ENUM_FIELD(subLinkType, SubLinkType);
WRITE_INT_FIELD(subLinkId);
WRITE_NODE_FIELD(testexpr);
WRITE_NODE_FIELD(operName);
WRITE_NODE_FIELD(subselect);
WRITE_LOCATION_FIELD(location);
}
static void
_outSubPlan(StringInfo str, const SubPlan *node)
{
WRITE_NODE_TYPE("SUBPLAN");
WRITE_ENUM_FIELD(subLinkType, SubLinkType);
WRITE_NODE_FIELD(testexpr);
WRITE_NODE_FIELD(paramIds);
WRITE_INT_FIELD(plan_id);
WRITE_STRING_FIELD(plan_name);
WRITE_OID_FIELD(firstColType);
WRITE_INT_FIELD(firstColTypmod);
WRITE_OID_FIELD(firstColCollation);
WRITE_BOOL_FIELD(useHashTable);
WRITE_BOOL_FIELD(unknownEqFalse);
WRITE_NODE_FIELD(setParam);
WRITE_NODE_FIELD(parParam);
WRITE_NODE_FIELD(args);
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
WRITE_FLOAT_FIELD(per_call_cost, "%.2f");
}
static void
_outAlternativeSubPlan(StringInfo str, const AlternativeSubPlan *node)
{
WRITE_NODE_TYPE("ALTERNATIVESUBPLAN");
WRITE_NODE_FIELD(subplans);
}
static void
_outFieldSelect(StringInfo str, const FieldSelect *node)
{
WRITE_NODE_TYPE("FIELDSELECT");
WRITE_NODE_FIELD(arg);
WRITE_INT_FIELD(fieldnum);
WRITE_OID_FIELD(resulttype);
WRITE_INT_FIELD(resulttypmod);
WRITE_OID_FIELD(resultcollid);
}
static void
_outFieldStore(StringInfo str, const FieldStore *node)
{
WRITE_NODE_TYPE("FIELDSTORE");
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(newvals);
WRITE_NODE_FIELD(fieldnums);
WRITE_OID_FIELD(resulttype);
}
static void
_outRelabelType(StringInfo str, const RelabelType *node)
{
WRITE_NODE_TYPE("RELABELTYPE");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(resulttype);
WRITE_INT_FIELD(resulttypmod);
WRITE_OID_FIELD(resultcollid);
WRITE_ENUM_FIELD(relabelformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outCoerceViaIO(StringInfo str, const CoerceViaIO *node)
{
WRITE_NODE_TYPE("COERCEVIAIO");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(resulttype);
WRITE_OID_FIELD(resultcollid);
WRITE_ENUM_FIELD(coerceformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outArrayCoerceExpr(StringInfo str, const ArrayCoerceExpr *node)
{
WRITE_NODE_TYPE("ARRAYCOERCEEXPR");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(elemfuncid);
WRITE_OID_FIELD(resulttype);
WRITE_INT_FIELD(resulttypmod);
WRITE_OID_FIELD(resultcollid);
WRITE_BOOL_FIELD(isExplicit);
WRITE_ENUM_FIELD(coerceformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outConvertRowtypeExpr(StringInfo str, const ConvertRowtypeExpr *node)
{
WRITE_NODE_TYPE("CONVERTROWTYPEEXPR");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(resulttype);
WRITE_ENUM_FIELD(convertformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outCollateExpr(StringInfo str, const CollateExpr *node)
{
WRITE_NODE_TYPE("COLLATE");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(collOid);
WRITE_LOCATION_FIELD(location);
}
static void
_outCaseExpr(StringInfo str, const CaseExpr *node)
{
WRITE_NODE_TYPE("CASE");
WRITE_OID_FIELD(casetype);
WRITE_OID_FIELD(casecollid);
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(defresult);
WRITE_LOCATION_FIELD(location);
}
static void
_outCaseWhen(StringInfo str, const CaseWhen *node)
{
WRITE_NODE_TYPE("WHEN");
WRITE_NODE_FIELD(expr);
WRITE_NODE_FIELD(result);
WRITE_LOCATION_FIELD(location);
}
static void
_outCaseTestExpr(StringInfo str, const CaseTestExpr *node)
{
WRITE_NODE_TYPE("CASETESTEXPR");
WRITE_OID_FIELD(typeId);
WRITE_INT_FIELD(typeMod);
WRITE_OID_FIELD(collation);
}
static void
_outArrayExpr(StringInfo str, const ArrayExpr *node)
{
WRITE_NODE_TYPE("ARRAY");
WRITE_OID_FIELD(array_typeid);
WRITE_OID_FIELD(array_collid);
WRITE_OID_FIELD(element_typeid);
WRITE_NODE_FIELD(elements);
WRITE_BOOL_FIELD(multidims);
WRITE_LOCATION_FIELD(location);
}
static void
_outRowExpr(StringInfo str, const RowExpr *node)
{
WRITE_NODE_TYPE("ROW");
WRITE_NODE_FIELD(args);
WRITE_OID_FIELD(row_typeid);
WRITE_ENUM_FIELD(row_format, CoercionForm);
WRITE_NODE_FIELD(colnames);
WRITE_LOCATION_FIELD(location);
}
static void
_outRowCompareExpr(StringInfo str, const RowCompareExpr *node)
{
WRITE_NODE_TYPE("ROWCOMPARE");
WRITE_ENUM_FIELD(rctype, RowCompareType);
WRITE_NODE_FIELD(opnos);
WRITE_NODE_FIELD(opfamilies);
WRITE_NODE_FIELD(inputcollids);
WRITE_NODE_FIELD(largs);
WRITE_NODE_FIELD(rargs);
}
static void
_outCoalesceExpr(StringInfo str, const CoalesceExpr *node)
{
WRITE_NODE_TYPE("COALESCE");
WRITE_OID_FIELD(coalescetype);
WRITE_OID_FIELD(coalescecollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outMinMaxExpr(StringInfo str, const MinMaxExpr *node)
{
WRITE_NODE_TYPE("MINMAX");
WRITE_OID_FIELD(minmaxtype);
WRITE_OID_FIELD(minmaxcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_ENUM_FIELD(op, MinMaxOp);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outXmlExpr(StringInfo str, const XmlExpr *node)
{
WRITE_NODE_TYPE("XMLEXPR");
2007-11-15 22:14:46 +01:00
WRITE_ENUM_FIELD(op, XmlExprOp);
WRITE_STRING_FIELD(name);
WRITE_NODE_FIELD(named_args);
WRITE_NODE_FIELD(arg_names);
WRITE_NODE_FIELD(args);
WRITE_ENUM_FIELD(xmloption, XmlOptionType);
WRITE_OID_FIELD(type);
WRITE_INT_FIELD(typmod);
WRITE_LOCATION_FIELD(location);
}
static void
_outNullTest(StringInfo str, const NullTest *node)
{
WRITE_NODE_TYPE("NULLTEST");
WRITE_NODE_FIELD(arg);
WRITE_ENUM_FIELD(nulltesttype, NullTestType);
WRITE_BOOL_FIELD(argisrow);
WRITE_LOCATION_FIELD(location);
}
static void
_outBooleanTest(StringInfo str, const BooleanTest *node)
{
WRITE_NODE_TYPE("BOOLEANTEST");
WRITE_NODE_FIELD(arg);
WRITE_ENUM_FIELD(booltesttype, BoolTestType);
WRITE_LOCATION_FIELD(location);
}
static void
_outCoerceToDomain(StringInfo str, const CoerceToDomain *node)
{
WRITE_NODE_TYPE("COERCETODOMAIN");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(resulttype);
WRITE_INT_FIELD(resulttypmod);
WRITE_OID_FIELD(resultcollid);
WRITE_ENUM_FIELD(coercionformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outCoerceToDomainValue(StringInfo str, const CoerceToDomainValue *node)
{
WRITE_NODE_TYPE("COERCETODOMAINVALUE");
WRITE_OID_FIELD(typeId);
WRITE_INT_FIELD(typeMod);
WRITE_OID_FIELD(collation);
WRITE_LOCATION_FIELD(location);
}
static void
_outSetToDefault(StringInfo str, const SetToDefault *node)
{
WRITE_NODE_TYPE("SETTODEFAULT");
WRITE_OID_FIELD(typeId);
WRITE_INT_FIELD(typeMod);
WRITE_OID_FIELD(collation);
WRITE_LOCATION_FIELD(location);
}
static void
_outCurrentOfExpr(StringInfo str, const CurrentOfExpr *node)
{
WRITE_NODE_TYPE("CURRENTOFEXPR");
WRITE_UINT_FIELD(cvarno);
WRITE_STRING_FIELD(cursor_name);
WRITE_INT_FIELD(cursor_param);
}
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
static void
_outInferenceElem(StringInfo str, const InferenceElem *node)
{
WRITE_NODE_TYPE("INFERENCEELEM");
WRITE_NODE_FIELD(expr);
WRITE_OID_FIELD(infercollid);
WRITE_OID_FIELD(inferopclass);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
}
static void
_outTargetEntry(StringInfo str, const TargetEntry *node)
{
WRITE_NODE_TYPE("TARGETENTRY");
WRITE_NODE_FIELD(expr);
WRITE_INT_FIELD(resno);
WRITE_STRING_FIELD(resname);
WRITE_UINT_FIELD(ressortgroupref);
WRITE_OID_FIELD(resorigtbl);
WRITE_INT_FIELD(resorigcol);
WRITE_BOOL_FIELD(resjunk);
}
static void
_outRangeTblRef(StringInfo str, const RangeTblRef *node)
{
WRITE_NODE_TYPE("RANGETBLREF");
WRITE_INT_FIELD(rtindex);
}
static void
_outJoinExpr(StringInfo str, const JoinExpr *node)
{
WRITE_NODE_TYPE("JOINEXPR");
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_BOOL_FIELD(isNatural);
WRITE_NODE_FIELD(larg);
WRITE_NODE_FIELD(rarg);
WRITE_NODE_FIELD(usingClause);
WRITE_NODE_FIELD(quals);
WRITE_NODE_FIELD(alias);
WRITE_INT_FIELD(rtindex);
}
static void
_outFromExpr(StringInfo str, const FromExpr *node)
{
WRITE_NODE_TYPE("FROMEXPR");
WRITE_NODE_FIELD(fromlist);
WRITE_NODE_FIELD(quals);
1999-01-21 17:08:55 +01:00
}
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
static void
_outOnConflictExpr(StringInfo str, const OnConflictExpr *node)
{
WRITE_NODE_TYPE("ONCONFLICTEXPR");
WRITE_ENUM_FIELD(action, OnConflictAction);
WRITE_NODE_FIELD(arbiterElems);
WRITE_NODE_FIELD(arbiterWhere);
WRITE_OID_FIELD(constraint);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
WRITE_NODE_FIELD(onConflictSet);
WRITE_NODE_FIELD(onConflictWhere);
WRITE_INT_FIELD(exclRelIndex);
WRITE_NODE_FIELD(exclRelTlist);
}
/*****************************************************************************
*
* Stuff from relation.h.
*
*****************************************************************************/
/*
* print the basic stuff of all nodes that inherit from Path
*
* Note we do NOT print the parent, else we'd be in infinite recursion.
* We can print the parent's relids for identification purposes, though.
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
* We also do not print the whole of param_info, since it's printed by
* _outRelOptInfo; it's sufficient and less cluttering to print just the
* required outer relids.
*/
static void
_outPathInfo(StringInfo str, const Path *node)
{
WRITE_ENUM_FIELD(pathtype, NodeTag);
appendStringInfoString(str, " :parent_relids ");
if (node->parent)
_outBitmapset(str, node->parent->relids);
else
_outBitmapset(str, NULL);
appendStringInfoString(str, " :required_outer ");
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
if (node->param_info)
_outBitmapset(str, node->param_info->ppi_req_outer);
else
_outBitmapset(str, NULL);
WRITE_BOOL_FIELD(parallel_aware);
WRITE_FLOAT_FIELD(rows, "%.0f");
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
WRITE_FLOAT_FIELD(total_cost, "%.2f");
WRITE_NODE_FIELD(pathkeys);
}
/*
* print the basic stuff of all nodes that inherit from JoinPath
*/
static void
_outJoinPathInfo(StringInfo str, const JoinPath *node)
{
_outPathInfo(str, (const Path *) node);
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_NODE_FIELD(outerjoinpath);
WRITE_NODE_FIELD(innerjoinpath);
WRITE_NODE_FIELD(joinrestrictinfo);
}
static void
_outPath(StringInfo str, const Path *node)
{
WRITE_NODE_TYPE("PATH");
_outPathInfo(str, (const Path *) node);
}
static void
_outIndexPath(StringInfo str, const IndexPath *node)
{
WRITE_NODE_TYPE("INDEXPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(indexinfo);
WRITE_NODE_FIELD(indexclauses);
WRITE_NODE_FIELD(indexquals);
WRITE_NODE_FIELD(indexqualcols);
WRITE_NODE_FIELD(indexorderbys);
WRITE_NODE_FIELD(indexorderbycols);
WRITE_ENUM_FIELD(indexscandir, ScanDirection);
WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
}
static void
_outBitmapHeapPath(StringInfo str, const BitmapHeapPath *node)
{
WRITE_NODE_TYPE("BITMAPHEAPPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(bitmapqual);
}
static void
_outBitmapAndPath(StringInfo str, const BitmapAndPath *node)
{
WRITE_NODE_TYPE("BITMAPANDPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(bitmapquals);
WRITE_FLOAT_FIELD(bitmapselectivity, "%.4f");
}
static void
_outBitmapOrPath(StringInfo str, const BitmapOrPath *node)
{
WRITE_NODE_TYPE("BITMAPORPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(bitmapquals);
WRITE_FLOAT_FIELD(bitmapselectivity, "%.4f");
}
static void
_outTidPath(StringInfo str, const TidPath *node)
{
WRITE_NODE_TYPE("TIDPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(tidquals);
}
static void
_outForeignPath(StringInfo str, const ForeignPath *node)
{
WRITE_NODE_TYPE("FOREIGNPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(fdw_outerpath);
WRITE_NODE_FIELD(fdw_private);
}
static void
_outCustomPath(StringInfo str, const CustomPath *node)
{
WRITE_NODE_TYPE("CUSTOMPATH");
_outPathInfo(str, (const Path *) node);
WRITE_UINT_FIELD(flags);
WRITE_NODE_FIELD(custom_paths);
WRITE_NODE_FIELD(custom_private);
appendStringInfoString(str, " :methods ");
_outToken(str, node->methods->CustomName);
if (node->methods->TextOutCustomPath)
node->methods->TextOutCustomPath(str, node);
}
static void
_outAppendPath(StringInfo str, const AppendPath *node)
{
WRITE_NODE_TYPE("APPENDPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpaths);
}
static void
_outMergeAppendPath(StringInfo str, const MergeAppendPath *node)
{
WRITE_NODE_TYPE("MERGEAPPENDPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpaths);
WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
}
static void
_outResultPath(StringInfo str, const ResultPath *node)
{
WRITE_NODE_TYPE("RESULTPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(quals);
}
static void
_outMaterialPath(StringInfo str, const MaterialPath *node)
{
WRITE_NODE_TYPE("MATERIALPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
}
static void
_outUniquePath(StringInfo str, const UniquePath *node)
{
WRITE_NODE_TYPE("UNIQUEPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_ENUM_FIELD(umethod, UniquePathMethod);
WRITE_NODE_FIELD(in_operators);
WRITE_NODE_FIELD(uniq_exprs);
}
static void
_outGatherPath(StringInfo str, const GatherPath *node)
{
WRITE_NODE_TYPE("GATHERPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_INT_FIELD(num_workers);
WRITE_BOOL_FIELD(single_copy);
}
static void
_outNestPath(StringInfo str, const NestPath *node)
{
WRITE_NODE_TYPE("NESTPATH");
_outJoinPathInfo(str, (const JoinPath *) node);
}
static void
_outMergePath(StringInfo str, const MergePath *node)
{
WRITE_NODE_TYPE("MERGEPATH");
_outJoinPathInfo(str, (const JoinPath *) node);
WRITE_NODE_FIELD(path_mergeclauses);
WRITE_NODE_FIELD(outersortkeys);
WRITE_NODE_FIELD(innersortkeys);
WRITE_BOOL_FIELD(materialize_inner);
}
static void
_outHashPath(StringInfo str, const HashPath *node)
{
WRITE_NODE_TYPE("HASHPATH");
_outJoinPathInfo(str, (const JoinPath *) node);
WRITE_NODE_FIELD(path_hashclauses);
WRITE_INT_FIELD(num_batches);
}
static void
_outPlannerGlobal(StringInfo str, const PlannerGlobal *node)
{
WRITE_NODE_TYPE("PLANNERGLOBAL");
/* NB: this isn't a complete set of fields */
WRITE_NODE_FIELD(subplans);
WRITE_BITMAPSET_FIELD(rewindPlanIDs);
WRITE_NODE_FIELD(finalrtable);
WRITE_NODE_FIELD(finalrowmarks);
WRITE_NODE_FIELD(resultRelations);
WRITE_NODE_FIELD(relationOids);
WRITE_NODE_FIELD(invalItems);
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
WRITE_INT_FIELD(nParamExec);
WRITE_UINT_FIELD(lastPHId);
WRITE_UINT_FIELD(lastRowMarkId);
WRITE_BOOL_FIELD(transientPlan);
WRITE_BOOL_FIELD(hasRowSecurity);
Determine whether it's safe to attempt a parallel plan for a query. Commit 924bcf4f16d54c55310b28f77686608684734f42 introduced a framework for parallel computation in PostgreSQL that makes most but not all built-in functions safe to execute in parallel mode. In order to have parallel query, we'll need to be able to determine whether that query contains functions (either built-in or user-defined) that cannot be safely executed in parallel mode. This requires those functions to be labeled, so this patch introduces an infrastructure for that. Some functions currently labeled as safe may need to be revised depending on how pending issues related to heavyweight locking under paralllelism are resolved. Parallel plans can't be used except for the case where the query will run to completion. If portal execution were suspended, the parallel mode restrictions would need to remain in effect during that time, but that might make other queries fail. Therefore, this patch introduces a framework that enables consideration of parallel plans only when it is known that the plan will be run to completion. This probably needs some refinement; for example, at bind time, we do not know whether a query run via the extended protocol will be execution to completion or run with a limited fetch count. Having the client indicate its intentions at bind time would constitute a wire protocol break. Some contexts in which parallel mode would be safe are not adjusted by this patch; the default is not to try parallel plans except from call sites that have been updated to say that such plans are OK. This commit doesn't introduce any parallel paths or plans; it just provides a way to determine whether they could potentially be used. I'm committing it on the theory that the remaining parallel sequential scan patches will also get committed to this release, hopefully in the not-too-distant future. Robert Haas and Amit Kapila. Reviewed (in earlier versions) by Noah Misch.
2015-09-16 21:38:47 +02:00
WRITE_BOOL_FIELD(parallelModeOK);
WRITE_BOOL_FIELD(parallelModeNeeded);
}
static void
_outPlannerInfo(StringInfo str, const PlannerInfo *node)
{
WRITE_NODE_TYPE("PLANNERINFO");
/* NB: this isn't a complete set of fields */
WRITE_NODE_FIELD(parse);
WRITE_NODE_FIELD(glob);
WRITE_UINT_FIELD(query_level);
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
WRITE_NODE_FIELD(plan_params);
WRITE_BITMAPSET_FIELD(outer_params);
WRITE_BITMAPSET_FIELD(all_baserels);
Compute correct em_nullable_relids in get_eclass_for_sort_expr(). Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr must be able to compute valid em_nullable_relids for any new equivalence class members it creates. I'd worried about this in the commit message for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a problem because multi-member ECs should already exist when it runs. That is transparently wrong, though, because this function is also called by initialize_mergeclause_eclasses, which runs during deconstruct_jointree. The example given in the bug report (which the new regression test item is based upon) fails because the COALESCE() expression is first seen by initialize_mergeclause_eclasses rather than process_equivalence. Fixing this requires passing the appropriate nullable_relids set to get_eclass_for_sort_expr, and it requires new code to compute that set for top-level expressions such as ORDER BY, GROUP BY, etc. We store the top-level nullable_relids in a new field in PlannerInfo to avoid computing it many times. In the back branches, I've added the new field at the end of the struct to minimize ABI breakage for planner plugins. There doesn't seem to be a good alternative to changing get_eclass_for_sort_expr's API signature, though. There probably aren't any third-party extensions calling that function directly; moreover, if there are, they probably need to think about what to pass for nullable_relids anyway. Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
WRITE_BITMAPSET_FIELD(nullable_baserels);
WRITE_NODE_FIELD(join_rel_list);
WRITE_INT_FIELD(join_cur_level);
WRITE_NODE_FIELD(init_plans);
WRITE_NODE_FIELD(cte_plan_ids);
WRITE_NODE_FIELD(multiexpr_params);
WRITE_NODE_FIELD(eq_classes);
WRITE_NODE_FIELD(canon_pathkeys);
WRITE_NODE_FIELD(left_join_clauses);
WRITE_NODE_FIELD(right_join_clauses);
WRITE_NODE_FIELD(full_join_clauses);
WRITE_NODE_FIELD(join_info_list);
WRITE_NODE_FIELD(append_rel_list);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(placeholder_list);
WRITE_NODE_FIELD(query_pathkeys);
WRITE_NODE_FIELD(group_pathkeys);
WRITE_NODE_FIELD(window_pathkeys);
WRITE_NODE_FIELD(distinct_pathkeys);
WRITE_NODE_FIELD(sort_pathkeys);
WRITE_NODE_FIELD(minmax_aggs);
WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
WRITE_BOOL_FIELD(hasInheritedTarget);
WRITE_BOOL_FIELD(hasJoinRTEs);
WRITE_BOOL_FIELD(hasLateralRTEs);
WRITE_BOOL_FIELD(hasDeletedRTEs);
WRITE_BOOL_FIELD(hasHavingQual);
WRITE_BOOL_FIELD(hasPseudoConstantQuals);
WRITE_BOOL_FIELD(hasRecursion);
WRITE_INT_FIELD(wt_param_id);
WRITE_BITMAPSET_FIELD(curOuterRels);
WRITE_NODE_FIELD(curOuterParams);
}
static void
_outRelOptInfo(StringInfo str, const RelOptInfo *node)
{
WRITE_NODE_TYPE("RELOPTINFO");
/* NB: this isn't a complete set of fields */
WRITE_ENUM_FIELD(reloptkind, RelOptKind);
WRITE_BITMAPSET_FIELD(relids);
WRITE_FLOAT_FIELD(rows, "%.0f");
WRITE_INT_FIELD(width);
WRITE_BOOL_FIELD(consider_startup);
Fix planner's cost estimation for SEMI/ANTI joins with inner indexscans. When the inner side of a nestloop SEMI or ANTI join is an indexscan that uses all the join clauses as indexquals, it can be presumed that both matched and unmatched outer rows will be processed very quickly: for matched rows, we'll stop after fetching one row from the indexscan, while for unmatched rows we'll have an indexscan that finds no matching index entries, which should also be quick. The planner already knew about this, but it was nonetheless charging for at least one full run of the inner indexscan, as a consequence of concerns about the behavior of materialized inner scans --- but those concerns don't apply in the fast case. If the inner side has low cardinality (many matching rows) this could make an indexscan plan look far more expensive than it actually is. To fix, rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we don't add the inner scan cost until we've inspected the indexquals, and then we can add either the full-run cost or just the first tuple's cost as appropriate. Experimentation with this fix uncovered another problem: add_path and friends were coded to disregard cheap startup cost when considering parameterized paths. That's usually okay (and desirable, because it thins the path herd faster); but in this fast case for SEMI/ANTI joins, it could result in throwing away the desired plain indexscan path in favor of a bitmap scan path before we ever get to the join costing logic. In the many-matching-rows cases of interest here, a bitmap scan will do a lot more work than required, so this is a problem. To fix, add a per-relation flag consider_param_startup that works like the existing consider_startup flag, but applies to parameterized paths, and set it for relations that are the inside of a SEMI or ANTI join. To make this patch reasonably safe to back-patch, care has been taken to avoid changing the planner's behavior except in the very narrow case of SEMI/ANTI joins with inner indexscans. There are places in compare_path_costs_fuzzily and add_path_precheck that are not terribly consistent with the new approach, but changing them will affect planner decisions at the margins in other cases, so we'll leave that for a HEAD-only fix. Back-patch to 9.3; before that, the consider_startup flag didn't exist, meaning that the second aspect of the patch would be too invasive. Per a complaint from Peter Holzer and analysis by Tomas Vondra.
2015-06-03 17:58:47 +02:00
WRITE_BOOL_FIELD(consider_param_startup);
Generate parallel sequential scan plans in simple cases. Add a new flag, consider_parallel, to each RelOptInfo, indicating whether a plan for that relation could conceivably be run inside of a parallel worker. Right now, we're pretty conservative: for example, it might be possible to defer applying a parallel-restricted qual in a worker, and later do it in the leader, but right now we just don't try to parallelize access to that relation. That's probably the right decision in most cases, anyway. Using the new flag, generate parallel sequential scan plans for plain baserels, meaning that we now have parallel sequential scan in PostgreSQL. The logic here is pretty unsophisticated right now: the costing model probably isn't right in detail, and we can't push joins beneath Gather nodes, so the number of plans that can actually benefit from this is pretty limited right now. Lots more work is needed. Nevertheless, it seems time to enable this functionality so that all this code can actually be tested easily by users and developers. Note that, if you wish to test this functionality, it will be necessary to set max_parallel_degree to a value greater than the default of 0. Once a few more loose ends have been tidied up here, we might want to consider changing the default value of this GUC, but I'm leaving it alone for now. Along the way, fix a bug in cost_gather: the previous coding thought that a Gather node's transfer overhead should be costed on the basis of the relation size rather than the number of tuples that actually need to be passed off to the leader. Patch by me, reviewed in earlier versions by Amit Kapila.
2015-11-11 15:02:52 +01:00
WRITE_BOOL_FIELD(consider_parallel);
WRITE_NODE_FIELD(reltargetlist);
WRITE_NODE_FIELD(pathlist);
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
WRITE_NODE_FIELD(ppilist);
WRITE_NODE_FIELD(cheapest_startup_path);
WRITE_NODE_FIELD(cheapest_total_path);
WRITE_NODE_FIELD(cheapest_unique_path);
WRITE_NODE_FIELD(cheapest_parameterized_paths);
WRITE_BITMAPSET_FIELD(direct_lateral_relids);
WRITE_BITMAPSET_FIELD(lateral_relids);
WRITE_UINT_FIELD(relid);
WRITE_OID_FIELD(reltablespace);
WRITE_ENUM_FIELD(rtekind, RTEKind);
WRITE_INT_FIELD(min_attr);
WRITE_INT_FIELD(max_attr);
WRITE_NODE_FIELD(lateral_vars);
WRITE_BITMAPSET_FIELD(lateral_referencers);
WRITE_NODE_FIELD(indexlist);
WRITE_UINT_FIELD(pages);
WRITE_FLOAT_FIELD(tuples, "%.0f");
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
WRITE_NODE_FIELD(subplan);
WRITE_NODE_FIELD(subroot);
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
WRITE_NODE_FIELD(subplan_params);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_OID_FIELD(serverid);
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
/* we don't try to print fdwroutine or fdw_private */
WRITE_NODE_FIELD(baserestrictinfo);
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
}
static void
_outIndexOptInfo(StringInfo str, const IndexOptInfo *node)
{
WRITE_NODE_TYPE("INDEXOPTINFO");
/* NB: this isn't a complete set of fields */
WRITE_OID_FIELD(indexoid);
/* Do NOT print rel field, else infinite recursion */
WRITE_UINT_FIELD(pages);
WRITE_FLOAT_FIELD(tuples, "%.0f");
Redesign the planner's handling of index-descent cost estimation. Historically we've used a couple of very ad-hoc fudge factors to try to get the right results when indexes of different sizes would satisfy a query with the same number of index leaf tuples being visited. In commit 21a39de5809cd3050a37d2554323cc1d0cbeed9d I tweaked one of these fudge factors, with results that proved disastrous for larger indexes. Commit bf01e34b556ff37982ba2d882db424aa484c0d07 fudged it some more, but still with not a lot of principle behind it. What seems like a better way to address these issues is to explicitly model index-descent costs, since that's what's really at stake when considering diferent indexes with similar leaf-page-level costs. We tried that once long ago, and found that charging random_page_cost per page descended through was way too much, because upper btree levels tend to stay in cache in real-world workloads. However, there's still CPU costs to think about, and the previous fudge factors can be seen as a crude attempt to account for those costs. So this patch replaces those fudge factors with explicit charges for the number of tuple comparisons needed to descend the index tree, plus a small charge per page touched in the descent. The cost multipliers are chosen so that the resulting charges are in the vicinity of the historical (pre-9.2) fudge factors for indexes of up to about a million tuples, while not ballooning unreasonably beyond that, as the old fudge factor did (even more so in 9.2). To make this work accurately for btree indexes, add some code that allows extraction of the known root-page height from a btree. There's no equivalent number readily available for other index types, but we can use the log of the number of index pages as an approximate substitute. This seems like too much of a behavioral change to risk back-patching, but it should improve matters going forward. In 9.2 I'll just revert the fudge-factor change.
2013-01-11 18:56:58 +01:00
WRITE_INT_FIELD(tree_height);
WRITE_INT_FIELD(ncolumns);
Redesign the planner's handling of index-descent cost estimation. Historically we've used a couple of very ad-hoc fudge factors to try to get the right results when indexes of different sizes would satisfy a query with the same number of index leaf tuples being visited. In commit 21a39de5809cd3050a37d2554323cc1d0cbeed9d I tweaked one of these fudge factors, with results that proved disastrous for larger indexes. Commit bf01e34b556ff37982ba2d882db424aa484c0d07 fudged it some more, but still with not a lot of principle behind it. What seems like a better way to address these issues is to explicitly model index-descent costs, since that's what's really at stake when considering diferent indexes with similar leaf-page-level costs. We tried that once long ago, and found that charging random_page_cost per page descended through was way too much, because upper btree levels tend to stay in cache in real-world workloads. However, there's still CPU costs to think about, and the previous fudge factors can be seen as a crude attempt to account for those costs. So this patch replaces those fudge factors with explicit charges for the number of tuple comparisons needed to descend the index tree, plus a small charge per page touched in the descent. The cost multipliers are chosen so that the resulting charges are in the vicinity of the historical (pre-9.2) fudge factors for indexes of up to about a million tuples, while not ballooning unreasonably beyond that, as the old fudge factor did (even more so in 9.2). To make this work accurately for btree indexes, add some code that allows extraction of the known root-page height from a btree. There's no equivalent number readily available for other index types, but we can use the log of the number of index pages as an approximate substitute. This seems like too much of a behavioral change to risk back-patching, but it should improve matters going forward. In 9.2 I'll just revert the fudge-factor change.
2013-01-11 18:56:58 +01:00
/* array fields aren't really worth the trouble to print */
WRITE_OID_FIELD(relam);
/* indexprs is redundant since we print indextlist */
WRITE_NODE_FIELD(indpred);
WRITE_NODE_FIELD(indextlist);
WRITE_BOOL_FIELD(predOK);
WRITE_BOOL_FIELD(unique);
WRITE_BOOL_FIELD(immediate);
WRITE_BOOL_FIELD(hypothetical);
Redesign the planner's handling of index-descent cost estimation. Historically we've used a couple of very ad-hoc fudge factors to try to get the right results when indexes of different sizes would satisfy a query with the same number of index leaf tuples being visited. In commit 21a39de5809cd3050a37d2554323cc1d0cbeed9d I tweaked one of these fudge factors, with results that proved disastrous for larger indexes. Commit bf01e34b556ff37982ba2d882db424aa484c0d07 fudged it some more, but still with not a lot of principle behind it. What seems like a better way to address these issues is to explicitly model index-descent costs, since that's what's really at stake when considering diferent indexes with similar leaf-page-level costs. We tried that once long ago, and found that charging random_page_cost per page descended through was way too much, because upper btree levels tend to stay in cache in real-world workloads. However, there's still CPU costs to think about, and the previous fudge factors can be seen as a crude attempt to account for those costs. So this patch replaces those fudge factors with explicit charges for the number of tuple comparisons needed to descend the index tree, plus a small charge per page touched in the descent. The cost multipliers are chosen so that the resulting charges are in the vicinity of the historical (pre-9.2) fudge factors for indexes of up to about a million tuples, while not ballooning unreasonably beyond that, as the old fudge factor did (even more so in 9.2). To make this work accurately for btree indexes, add some code that allows extraction of the known root-page height from a btree. There's no equivalent number readily available for other index types, but we can use the log of the number of index pages as an approximate substitute. This seems like too much of a behavioral change to risk back-patching, but it should improve matters going forward. In 9.2 I'll just revert the fudge-factor change.
2013-01-11 18:56:58 +01:00
/* we don't bother with fields copied from the pg_am entry */
}
static void
_outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
{
/*
* To simplify reading, we just chase up to the topmost merged EC and
* print that, without bothering to show the merge-ees separately.
*/
while (node->ec_merged)
node = node->ec_merged;
WRITE_NODE_TYPE("EQUIVALENCECLASS");
WRITE_NODE_FIELD(ec_opfamilies);
WRITE_OID_FIELD(ec_collation);
WRITE_NODE_FIELD(ec_members);
WRITE_NODE_FIELD(ec_sources);
WRITE_NODE_FIELD(ec_derives);
WRITE_BITMAPSET_FIELD(ec_relids);
WRITE_BOOL_FIELD(ec_has_const);
WRITE_BOOL_FIELD(ec_has_volatile);
WRITE_BOOL_FIELD(ec_below_outer_join);
WRITE_BOOL_FIELD(ec_broken);
WRITE_UINT_FIELD(ec_sortref);
}
static void
_outEquivalenceMember(StringInfo str, const EquivalenceMember *node)
{
WRITE_NODE_TYPE("EQUIVALENCEMEMBER");
WRITE_NODE_FIELD(em_expr);
WRITE_BITMAPSET_FIELD(em_relids);
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
WRITE_BITMAPSET_FIELD(em_nullable_relids);
WRITE_BOOL_FIELD(em_is_const);
WRITE_BOOL_FIELD(em_is_child);
WRITE_OID_FIELD(em_datatype);
}
static void
_outPathKey(StringInfo str, const PathKey *node)
{
WRITE_NODE_TYPE("PATHKEY");
WRITE_NODE_FIELD(pk_eclass);
WRITE_OID_FIELD(pk_opfamily);
WRITE_INT_FIELD(pk_strategy);
WRITE_BOOL_FIELD(pk_nulls_first);
}
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
static void
_outParamPathInfo(StringInfo str, const ParamPathInfo *node)
{
WRITE_NODE_TYPE("PARAMPATHINFO");
WRITE_BITMAPSET_FIELD(ppi_req_outer);
WRITE_FLOAT_FIELD(ppi_rows, "%.0f");
WRITE_NODE_FIELD(ppi_clauses);
}
static void
_outRestrictInfo(StringInfo str, const RestrictInfo *node)
{
WRITE_NODE_TYPE("RESTRICTINFO");
/* NB: this isn't a complete set of fields */
WRITE_NODE_FIELD(clause);
WRITE_BOOL_FIELD(is_pushed_down);
WRITE_BOOL_FIELD(outerjoin_delayed);
WRITE_BOOL_FIELD(can_join);
WRITE_BOOL_FIELD(pseudoconstant);
WRITE_BITMAPSET_FIELD(clause_relids);
WRITE_BITMAPSET_FIELD(required_relids);
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
WRITE_BITMAPSET_FIELD(outer_relids);
WRITE_BITMAPSET_FIELD(nullable_relids);
WRITE_BITMAPSET_FIELD(left_relids);
WRITE_BITMAPSET_FIELD(right_relids);
WRITE_NODE_FIELD(orclause);
/* don't write parent_ec, leads to infinite recursion in plan tree dump */
WRITE_FLOAT_FIELD(norm_selec, "%.4f");
WRITE_FLOAT_FIELD(outer_selec, "%.4f");
WRITE_NODE_FIELD(mergeopfamilies);
/* don't write left_ec, leads to infinite recursion in plan tree dump */
/* don't write right_ec, leads to infinite recursion in plan tree dump */
WRITE_NODE_FIELD(left_em);
WRITE_NODE_FIELD(right_em);
WRITE_BOOL_FIELD(outer_is_left);
WRITE_OID_FIELD(hashjoinoperator);
}
static void
_outPlaceHolderVar(StringInfo str, const PlaceHolderVar *node)
{
WRITE_NODE_TYPE("PLACEHOLDERVAR");
WRITE_NODE_FIELD(phexpr);
WRITE_BITMAPSET_FIELD(phrels);
WRITE_UINT_FIELD(phid);
WRITE_UINT_FIELD(phlevelsup);
}
static void
_outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
{
WRITE_NODE_TYPE("SPECIALJOININFO");
WRITE_BITMAPSET_FIELD(min_lefthand);
WRITE_BITMAPSET_FIELD(min_righthand);
Rewrite make_outerjoininfo's construction of min_lefthand and min_righthand sets for outer joins, in the light of bug #3588 and additional thought and experimentation. The original methodology was fatally flawed for nests of more than two outer joins: it got the relationships between adjacent joins right, but didn't always come to the right conclusions about whether a join could be interchanged with one two or more levels below it. This was largely caused by a mistaken idea that we should use the min_lefthand + min_righthand sets of a sub-join as the minimum left or right input set of an upper join when we conclude that the sub-join can't commute with the upper one. If there's a still-lower join that the sub-join *can* commute with, this method led us to think that that one could commute with the topmost join; which it can't. Another problem (not directly connected to bug #3588) was that make_outerjoininfo's processing-order-dependent method for enforcing outer join identity #3 didn't work right: if we decided that join A could safely commute with lower join B, we dropped all information about sub-joins under B that join A could perhaps not safely commute with, because we removed B's entire min_righthand from A's. To fix, make an explicit computation of all inner join combinations that occur below an outer join, and add to that the full syntactic relsets of any lower outer joins that we determine it can't commute with. This method gives much more direct enforcement of the outer join rearrangement identities, and it turns out not to cost a lot of additional bookkeeping. Thanks to Richard Harris for the bug report and test case.
2007-08-31 03:44:06 +02:00
WRITE_BITMAPSET_FIELD(syn_lefthand);
WRITE_BITMAPSET_FIELD(syn_righthand);
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_BOOL_FIELD(lhs_strict);
WRITE_BOOL_FIELD(delay_upper_joins);
Improve planner's cost estimation in the presence of semijoins. If we have a semijoin, say SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y) and we're estimating the cost of a parameterized indexscan on x, the number of repetitions of the indexscan should not be taken as the size of y; it'll really only be the number of distinct values of y1, because the only valid plan with y on the outside of a nestloop would require y to be unique-ified before joining it to x. Most of the time this doesn't make that much difference, but sometimes it can lead to drastically underestimating the cost of the indexscan and hence choosing a bad plan, as pointed out by David Kubečka. Fixing this is a bit difficult because parameterized indexscans are costed out quite early in the planning process, before we have the information that would be needed to call estimate_num_groups() and thereby estimate the number of distinct values of the join column(s). However we can move the code that extracts a semijoin RHS's unique-ification columns, so that it's done in initsplan.c rather than on-the-fly in create_unique_path(). That shouldn't make any difference speed-wise and it's really a bit cleaner too. The other bit of information we need is the size of the semijoin RHS, which is easy if it's a single relation (we make those estimates before considering indexscan costs) but problematic if it's a join relation. The solution adopted here is just to use the product of the sizes of the join component rels. That will generally be an overestimate, but since estimate_num_groups() only uses this input as a clamp, an overestimate shouldn't hurt us too badly. In any case we don't allow this new logic to produce a value larger than we would have chosen before, so that at worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
WRITE_BOOL_FIELD(semi_can_btree);
WRITE_BOOL_FIELD(semi_can_hash);
WRITE_NODE_FIELD(semi_operators);
WRITE_NODE_FIELD(semi_rhs_exprs);
}
static void
_outAppendRelInfo(StringInfo str, const AppendRelInfo *node)
{
WRITE_NODE_TYPE("APPENDRELINFO");
WRITE_UINT_FIELD(parent_relid);
WRITE_UINT_FIELD(child_relid);
WRITE_OID_FIELD(parent_reltype);
WRITE_OID_FIELD(child_reltype);
WRITE_NODE_FIELD(translated_vars);
WRITE_OID_FIELD(parent_reloid);
}
static void
_outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node)
{
WRITE_NODE_TYPE("PLACEHOLDERINFO");
WRITE_UINT_FIELD(phid);
WRITE_NODE_FIELD(ph_var);
WRITE_BITMAPSET_FIELD(ph_eval_at);
WRITE_BITMAPSET_FIELD(ph_lateral);
WRITE_BITMAPSET_FIELD(ph_needed);
WRITE_INT_FIELD(ph_width);
}
static void
_outMinMaxAggInfo(StringInfo str, const MinMaxAggInfo *node)
{
WRITE_NODE_TYPE("MINMAXAGGINFO");
WRITE_OID_FIELD(aggfnoid);
WRITE_OID_FIELD(aggsortop);
WRITE_NODE_FIELD(target);
/* We intentionally omit subroot --- too large, not interesting enough */
WRITE_NODE_FIELD(path);
WRITE_FLOAT_FIELD(pathcost, "%.2f");
WRITE_NODE_FIELD(param);
}
static void
_outPlannerParamItem(StringInfo str, const PlannerParamItem *node)
{
WRITE_NODE_TYPE("PLANNERPARAMITEM");
WRITE_NODE_FIELD(item);
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
WRITE_INT_FIELD(paramId);
}
/*****************************************************************************
*
* Stuff from parsenodes.h.
*
*****************************************************************************/
/*
* print the basic stuff of all nodes that inherit from CreateStmt
*/
static void
_outCreateStmtInfo(StringInfo str, const CreateStmt *node)
{
WRITE_NODE_FIELD(relation);
WRITE_NODE_FIELD(tableElts);
WRITE_NODE_FIELD(inhRelations);
WRITE_NODE_FIELD(ofTypename);
WRITE_NODE_FIELD(constraints);
WRITE_NODE_FIELD(options);
WRITE_ENUM_FIELD(oncommit, OnCommitAction);
WRITE_STRING_FIELD(tablespacename);
WRITE_BOOL_FIELD(if_not_exists);
}
static void
_outCreateStmt(StringInfo str, const CreateStmt *node)
{
WRITE_NODE_TYPE("CREATESTMT");
_outCreateStmtInfo(str, (const CreateStmt *) node);
}
static void
_outCreateForeignTableStmt(StringInfo str, const CreateForeignTableStmt *node)
{
WRITE_NODE_TYPE("CREATEFOREIGNTABLESTMT");
_outCreateStmtInfo(str, (const CreateStmt *) node);
WRITE_STRING_FIELD(servername);
WRITE_NODE_FIELD(options);
}
static void
_outImportForeignSchemaStmt(StringInfo str, const ImportForeignSchemaStmt *node)
{
WRITE_NODE_TYPE("IMPORTFOREIGNSCHEMASTMT");
WRITE_STRING_FIELD(server_name);
WRITE_STRING_FIELD(remote_schema);
WRITE_STRING_FIELD(local_schema);
WRITE_ENUM_FIELD(list_type, ImportForeignSchemaType);
WRITE_NODE_FIELD(table_list);
WRITE_NODE_FIELD(options);
}
static void
_outIndexStmt(StringInfo str, const IndexStmt *node)
{
WRITE_NODE_TYPE("INDEXSTMT");
WRITE_STRING_FIELD(idxname);
WRITE_NODE_FIELD(relation);
WRITE_STRING_FIELD(accessMethod);
WRITE_STRING_FIELD(tableSpace);
WRITE_NODE_FIELD(indexParams);
WRITE_NODE_FIELD(options);
WRITE_NODE_FIELD(whereClause);
WRITE_NODE_FIELD(excludeOpNames);
WRITE_STRING_FIELD(idxcomment);
WRITE_OID_FIELD(indexOid);
WRITE_OID_FIELD(oldNode);
WRITE_BOOL_FIELD(unique);
WRITE_BOOL_FIELD(primary);
WRITE_BOOL_FIELD(isconstraint);
WRITE_BOOL_FIELD(deferrable);
WRITE_BOOL_FIELD(initdeferred);
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
WRITE_BOOL_FIELD(transformed);
WRITE_BOOL_FIELD(concurrent);
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
WRITE_BOOL_FIELD(if_not_exists);
}
static void
_outNotifyStmt(StringInfo str, const NotifyStmt *node)
{
WRITE_NODE_TYPE("NOTIFY");
WRITE_STRING_FIELD(conditionname);
WRITE_STRING_FIELD(payload);
}
static void
_outDeclareCursorStmt(StringInfo str, const DeclareCursorStmt *node)
{
WRITE_NODE_TYPE("DECLARECURSOR");
WRITE_STRING_FIELD(portalname);
WRITE_INT_FIELD(options);
WRITE_NODE_FIELD(query);
}
static void
_outSelectStmt(StringInfo str, const SelectStmt *node)
{
WRITE_NODE_TYPE("SELECT");
WRITE_NODE_FIELD(distinctClause);
WRITE_NODE_FIELD(intoClause);
WRITE_NODE_FIELD(targetList);
WRITE_NODE_FIELD(fromClause);
WRITE_NODE_FIELD(whereClause);
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(havingClause);
WRITE_NODE_FIELD(windowClause);
WRITE_NODE_FIELD(valuesLists);
WRITE_NODE_FIELD(sortClause);
WRITE_NODE_FIELD(limitOffset);
WRITE_NODE_FIELD(limitCount);
WRITE_NODE_FIELD(lockingClause);
WRITE_NODE_FIELD(withClause);
WRITE_ENUM_FIELD(op, SetOperation);
WRITE_BOOL_FIELD(all);
WRITE_NODE_FIELD(larg);
WRITE_NODE_FIELD(rarg);
}
static void
_outFuncCall(StringInfo str, const FuncCall *node)
{
WRITE_NODE_TYPE("FUNCCALL");
WRITE_NODE_FIELD(funcname);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(agg_order);
WRITE_NODE_FIELD(agg_filter);
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
WRITE_BOOL_FIELD(agg_within_group);
WRITE_BOOL_FIELD(agg_star);
WRITE_BOOL_FIELD(agg_distinct);
WRITE_BOOL_FIELD(func_variadic);
WRITE_NODE_FIELD(over);
WRITE_LOCATION_FIELD(location);
}
static void
_outDefElem(StringInfo str, const DefElem *node)
{
WRITE_NODE_TYPE("DEFELEM");
WRITE_STRING_FIELD(defnamespace);
WRITE_STRING_FIELD(defname);
WRITE_NODE_FIELD(arg);
WRITE_ENUM_FIELD(defaction, DefElemAction);
}
static void
_outTableLikeClause(StringInfo str, const TableLikeClause *node)
{
WRITE_NODE_TYPE("TABLELIKECLAUSE");
WRITE_NODE_FIELD(relation);
WRITE_UINT_FIELD(options);
}
static void
_outLockingClause(StringInfo str, const LockingClause *node)
{
WRITE_NODE_TYPE("LOCKINGCLAUSE");
WRITE_NODE_FIELD(lockedRels);
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
WRITE_ENUM_FIELD(strength, LockClauseStrength);
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
}
static void
_outXmlSerialize(StringInfo str, const XmlSerialize *node)
{
WRITE_NODE_TYPE("XMLSERIALIZE");
WRITE_ENUM_FIELD(xmloption, XmlOptionType);
WRITE_NODE_FIELD(expr);
WRITE_NODE_FIELD(typeName);
WRITE_LOCATION_FIELD(location);
}
static void
_outColumnDef(StringInfo str, const ColumnDef *node)
{
WRITE_NODE_TYPE("COLUMNDEF");
WRITE_STRING_FIELD(colname);
WRITE_NODE_FIELD(typeName);
WRITE_INT_FIELD(inhcount);
WRITE_BOOL_FIELD(is_local);
WRITE_BOOL_FIELD(is_not_null);
Remove collation information from TypeName, where it does not belong. The initial collations patch treated a COLLATE spec as part of a TypeName, following what can only be described as brain fade on the part of the SQL committee. It's a lot more reasonable to treat COLLATE as a syntactically separate object, so that it can be added in only the productions where it actually belongs, rather than needing to reject it in a boatload of places where it doesn't belong (something the original patch mostly failed to do). In addition this change lets us meet the spec's requirement to allow COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly behavior for constructs such as "foo::type COLLATE collation". To do this, pull collation information out of TypeName and put it in ColumnDef instead, thus reverting most of the collation-related changes in parse_type.c's API. I made one additional structural change, which was to use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd nodes. This provides enough room to get rid of the "transform" wart in AlterTableCmd too, since the ColumnDef can carry the USING expression easily enough. Also fix some other minor bugs that have crept in in the same areas, like failure to copy recently-added fields of ColumnDef in copyfuncs.c. While at it, document the formerly secret ability to specify a collation in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about what the default collation selection will be when COLLATE is omitted. BTW, the three-parameter form of format_type() should go away too, since it just contributes to the confusion in this area; but I'll do that in a separate patch.
2011-03-10 04:38:52 +01:00
WRITE_BOOL_FIELD(is_from_type);
WRITE_CHAR_FIELD(storage);
WRITE_NODE_FIELD(raw_default);
WRITE_NODE_FIELD(cooked_default);
Remove collation information from TypeName, where it does not belong. The initial collations patch treated a COLLATE spec as part of a TypeName, following what can only be described as brain fade on the part of the SQL committee. It's a lot more reasonable to treat COLLATE as a syntactically separate object, so that it can be added in only the productions where it actually belongs, rather than needing to reject it in a boatload of places where it doesn't belong (something the original patch mostly failed to do). In addition this change lets us meet the spec's requirement to allow COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly behavior for constructs such as "foo::type COLLATE collation". To do this, pull collation information out of TypeName and put it in ColumnDef instead, thus reverting most of the collation-related changes in parse_type.c's API. I made one additional structural change, which was to use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd nodes. This provides enough room to get rid of the "transform" wart in AlterTableCmd too, since the ColumnDef can carry the USING expression easily enough. Also fix some other minor bugs that have crept in in the same areas, like failure to copy recently-added fields of ColumnDef in copyfuncs.c. While at it, document the formerly secret ability to specify a collation in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about what the default collation selection will be when COLLATE is omitted. BTW, the three-parameter form of format_type() should go away too, since it just contributes to the confusion in this area; but I'll do that in a separate patch.
2011-03-10 04:38:52 +01:00
WRITE_NODE_FIELD(collClause);
WRITE_OID_FIELD(collOid);
WRITE_NODE_FIELD(constraints);
WRITE_NODE_FIELD(fdwoptions);
WRITE_LOCATION_FIELD(location);
}
static void
_outTypeName(StringInfo str, const TypeName *node)
{
WRITE_NODE_TYPE("TYPENAME");
WRITE_NODE_FIELD(names);
WRITE_OID_FIELD(typeOid);
WRITE_BOOL_FIELD(setof);
WRITE_BOOL_FIELD(pct_type);
WRITE_NODE_FIELD(typmods);
WRITE_INT_FIELD(typemod);
WRITE_NODE_FIELD(arrayBounds);
WRITE_LOCATION_FIELD(location);
}
static void
_outTypeCast(StringInfo str, const TypeCast *node)
{
WRITE_NODE_TYPE("TYPECAST");
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(typeName);
WRITE_LOCATION_FIELD(location);
}
static void
_outCollateClause(StringInfo str, const CollateClause *node)
{
WRITE_NODE_TYPE("COLLATECLAUSE");
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(collname);
WRITE_LOCATION_FIELD(location);
}
static void
_outIndexElem(StringInfo str, const IndexElem *node)
{
WRITE_NODE_TYPE("INDEXELEM");
WRITE_STRING_FIELD(name);
WRITE_NODE_FIELD(expr);
WRITE_STRING_FIELD(indexcolname);
WRITE_NODE_FIELD(collation);
WRITE_NODE_FIELD(opclass);
WRITE_ENUM_FIELD(ordering, SortByDir);
WRITE_ENUM_FIELD(nulls_ordering, SortByNulls);
}
static void
_outQuery(StringInfo str, const Query *node)
{
WRITE_NODE_TYPE("QUERY");
WRITE_ENUM_FIELD(commandType, CmdType);
WRITE_ENUM_FIELD(querySource, QuerySource);
/* we intentionally do not print the queryId field */
WRITE_BOOL_FIELD(canSetTag);
/*
* Hack to work around missing outfuncs routines for a lot of the
* utility-statement node types. (The only one we actually *need* for
2005-10-15 04:49:52 +02:00
* rules support is NotifyStmt.) Someday we ought to support 'em all, but
* for the meantime do this to avoid getting lots of warnings when running
* with debug_print_parse on.
*/
if (node->utilityStmt)
{
switch (nodeTag(node->utilityStmt))
{
case T_CreateStmt:
case T_IndexStmt:
case T_NotifyStmt:
case T_DeclareCursorStmt:
WRITE_NODE_FIELD(utilityStmt);
break;
default:
appendStringInfoString(str, " :utilityStmt ?");
break;
}
}
else
appendStringInfoString(str, " :utilityStmt <>");
WRITE_INT_FIELD(resultRelation);
WRITE_BOOL_FIELD(hasAggs);
WRITE_BOOL_FIELD(hasWindowFuncs);
WRITE_BOOL_FIELD(hasSubLinks);
WRITE_BOOL_FIELD(hasDistinctOn);
WRITE_BOOL_FIELD(hasRecursive);
WRITE_BOOL_FIELD(hasModifyingCTE);
WRITE_BOOL_FIELD(hasForUpdate);
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
WRITE_BOOL_FIELD(hasRowSecurity);
WRITE_NODE_FIELD(cteList);
WRITE_NODE_FIELD(rtable);
WRITE_NODE_FIELD(jointree);
WRITE_NODE_FIELD(targetList);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
WRITE_NODE_FIELD(onConflict);
WRITE_NODE_FIELD(returningList);
WRITE_NODE_FIELD(groupClause);
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
WRITE_NODE_FIELD(groupingSets);
WRITE_NODE_FIELD(havingQual);
WRITE_NODE_FIELD(windowClause);
WRITE_NODE_FIELD(distinctClause);
WRITE_NODE_FIELD(sortClause);
WRITE_NODE_FIELD(limitOffset);
WRITE_NODE_FIELD(limitCount);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(setOperations);
WRITE_NODE_FIELD(constraintDeps);
}
static void
_outWithCheckOption(StringInfo str, const WithCheckOption *node)
{
WRITE_NODE_TYPE("WITHCHECKOPTION");
WRITE_ENUM_FIELD(kind, WCOKind);
WRITE_STRING_FIELD(relname);
WRITE_STRING_FIELD(polname);
WRITE_NODE_FIELD(qual);
WRITE_BOOL_FIELD(cascaded);
}
static void
_outSortGroupClause(StringInfo str, const SortGroupClause *node)
{
WRITE_NODE_TYPE("SORTGROUPCLAUSE");
WRITE_UINT_FIELD(tleSortGroupRef);
WRITE_OID_FIELD(eqop);
WRITE_OID_FIELD(sortop);
WRITE_BOOL_FIELD(nulls_first);
WRITE_BOOL_FIELD(hashable);
}
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
static void
_outGroupingSet(StringInfo str, const GroupingSet *node)
{
WRITE_NODE_TYPE("GROUPINGSET");
WRITE_ENUM_FIELD(kind, GroupingSetKind);
WRITE_NODE_FIELD(content);
WRITE_LOCATION_FIELD(location);
}
static void
_outWindowClause(StringInfo str, const WindowClause *node)
{
WRITE_NODE_TYPE("WINDOWCLAUSE");
WRITE_STRING_FIELD(name);
WRITE_STRING_FIELD(refname);
WRITE_NODE_FIELD(partitionClause);
WRITE_NODE_FIELD(orderClause);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
WRITE_UINT_FIELD(winref);
WRITE_BOOL_FIELD(copiedOrder);
}
static void
_outRowMarkClause(StringInfo str, const RowMarkClause *node)
{
WRITE_NODE_TYPE("ROWMARKCLAUSE");
WRITE_UINT_FIELD(rti);
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
WRITE_ENUM_FIELD(strength, LockClauseStrength);
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
WRITE_BOOL_FIELD(pushedDown);
}
static void
_outWithClause(StringInfo str, const WithClause *node)
{
WRITE_NODE_TYPE("WITHCLAUSE");
WRITE_NODE_FIELD(ctes);
WRITE_BOOL_FIELD(recursive);
WRITE_LOCATION_FIELD(location);
}
static void
_outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
{
WRITE_NODE_TYPE("COMMONTABLEEXPR");
WRITE_STRING_FIELD(ctename);
WRITE_NODE_FIELD(aliascolnames);
WRITE_NODE_FIELD(ctequery);
WRITE_LOCATION_FIELD(location);
WRITE_BOOL_FIELD(cterecursive);
WRITE_INT_FIELD(cterefcount);
WRITE_NODE_FIELD(ctecolnames);
WRITE_NODE_FIELD(ctecoltypes);
WRITE_NODE_FIELD(ctecoltypmods);
WRITE_NODE_FIELD(ctecolcollations);
}
static void
_outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
{
WRITE_NODE_TYPE("SETOPERATIONSTMT");
WRITE_ENUM_FIELD(op, SetOperation);
WRITE_BOOL_FIELD(all);
WRITE_NODE_FIELD(larg);
WRITE_NODE_FIELD(rarg);
WRITE_NODE_FIELD(colTypes);
WRITE_NODE_FIELD(colTypmods);
WRITE_NODE_FIELD(colCollations);
WRITE_NODE_FIELD(groupClauses);
}
static void
_outRangeTblEntry(StringInfo str, const RangeTblEntry *node)
{
WRITE_NODE_TYPE("RTE");
/* put alias + eref first to make dump more legible */
WRITE_NODE_FIELD(alias);
WRITE_NODE_FIELD(eref);
WRITE_ENUM_FIELD(rtekind, RTEKind);
switch (node->rtekind)
{
case RTE_RELATION:
WRITE_OID_FIELD(relid);
WRITE_CHAR_FIELD(relkind);
WRITE_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
WRITE_NODE_FIELD(subquery);
WRITE_BOOL_FIELD(security_barrier);
break;
case RTE_JOIN:
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_NODE_FIELD(joinaliasvars);
break;
case RTE_FUNCTION:
WRITE_NODE_FIELD(functions);
WRITE_BOOL_FIELD(funcordinality);
break;
case RTE_VALUES:
WRITE_NODE_FIELD(values_lists);
WRITE_NODE_FIELD(values_collations);
break;
case RTE_CTE:
WRITE_STRING_FIELD(ctename);
WRITE_UINT_FIELD(ctelevelsup);
WRITE_BOOL_FIELD(self_reference);
WRITE_NODE_FIELD(ctecoltypes);
WRITE_NODE_FIELD(ctecoltypmods);
WRITE_NODE_FIELD(ctecolcollations);
break;
default:
elog(ERROR, "unrecognized RTE kind: %d", (int) node->rtekind);
break;
}
WRITE_BOOL_FIELD(lateral);
WRITE_BOOL_FIELD(inh);
WRITE_BOOL_FIELD(inFromCl);
WRITE_UINT_FIELD(requiredPerms);
WRITE_OID_FIELD(checkAsUser);
WRITE_BITMAPSET_FIELD(selectedCols);
WRITE_BITMAPSET_FIELD(insertedCols);
WRITE_BITMAPSET_FIELD(updatedCols);
WRITE_NODE_FIELD(securityQuals);
}
static void
_outRangeTblFunction(StringInfo str, const RangeTblFunction *node)
{
WRITE_NODE_TYPE("RANGETBLFUNCTION");
WRITE_NODE_FIELD(funcexpr);
WRITE_INT_FIELD(funccolcount);
WRITE_NODE_FIELD(funccolnames);
WRITE_NODE_FIELD(funccoltypes);
WRITE_NODE_FIELD(funccoltypmods);
WRITE_NODE_FIELD(funccolcollations);
WRITE_BITMAPSET_FIELD(funcparams);
}
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
static void
_outTableSampleClause(StringInfo str, const TableSampleClause *node)
{
WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
WRITE_OID_FIELD(tsmhandler);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(repeatable);
}
static void
_outAExpr(StringInfo str, const A_Expr *node)
{
WRITE_NODE_TYPE("AEXPR");
switch (node->kind)
{
case AEXPR_OP:
appendStringInfoChar(str, ' ');
WRITE_NODE_FIELD(name);
break;
case AEXPR_OP_ANY:
appendStringInfoChar(str, ' ');
WRITE_NODE_FIELD(name);
appendStringInfoString(str, " ANY ");
break;
case AEXPR_OP_ALL:
appendStringInfoChar(str, ' ');
WRITE_NODE_FIELD(name);
appendStringInfoString(str, " ALL ");
break;
case AEXPR_DISTINCT:
appendStringInfoString(str, " DISTINCT ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_NULLIF:
appendStringInfoString(str, " NULLIF ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_OF:
appendStringInfoString(str, " OF ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_IN:
appendStringInfoString(str, " IN ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_LIKE:
appendStringInfoString(str, " LIKE ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_ILIKE:
appendStringInfoString(str, " ILIKE ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_SIMILAR:
appendStringInfoString(str, " SIMILAR ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_BETWEEN:
appendStringInfoString(str, " BETWEEN ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_NOT_BETWEEN:
appendStringInfoString(str, " NOT_BETWEEN ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_BETWEEN_SYM:
appendStringInfoString(str, " BETWEEN_SYM ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_NOT_BETWEEN_SYM:
appendStringInfoString(str, " NOT_BETWEEN_SYM ");
WRITE_NODE_FIELD(name);
break;
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
case AEXPR_PAREN:
appendStringInfoString(str, " PAREN");
break;
2000-05-26 00:43:12 +02:00
default:
appendStringInfoString(str, " ??");
2000-05-26 00:43:12 +02:00
break;
}
WRITE_NODE_FIELD(lexpr);
WRITE_NODE_FIELD(rexpr);
WRITE_LOCATION_FIELD(location);
}
static void
_outValue(StringInfo str, const Value *value)
{
switch (value->type)
{
case T_Integer:
appendStringInfo(str, "%ld", value->val.ival);
break;
case T_Float:
2004-08-29 07:07:03 +02:00
/*
2005-10-15 04:49:52 +02:00
* We assume the value is a valid numeric literal and so does not
* need quoting.
*/
appendStringInfoString(str, value->val.str);
break;
case T_String:
Partial fix for dropped columns in functions returning composite. When a view has a function-returning-composite in FROM, and there are some dropped columns in the underlying composite type, ruleutils.c printed junk in the column alias list for the reconstructed FROM entry. Before 9.3, this was prevented by doing get_rte_attribute_is_dropped tests while printing the column alias list; but that solution is not currently available to us for reasons I'll explain below. Instead, check for empty-string entries in the alias list, which can only exist if that column position had been dropped at the time the view was made. (The parser fills in empty strings to preserve the invariant that the aliases correspond to physical column positions.) While this is sufficient to handle the case of columns dropped before the view was made, we have still got issues with columns dropped after the view was made. In particular, the view could contain Vars that explicitly reference such columns! The dependency machinery really ought to refuse the column drop attempt in such cases, as it would do when trying to drop a table column that's explicitly referenced in views. However, we currently neglect to store dependencies on columns of composite types, and fixing that is likely to be too big to be back-patchable (not to mention that existing views in existing databases would not have the needed pg_depend entries anyway). So I'll leave that for a separate patch. Pre-9.3, ruleutils would print such Vars normally (with their original column names) even though it suppressed their entries in the RTE's column alias list. This is certainly bogus, since the printed view definition would fail to reload, but at least it didn't crash. However, as of 9.3 the printed column alias list is tightly tied to the names printed for Vars; so we can't treat columns as dropped for one purpose and not dropped for the other. This is why we can't just put back the get_rte_attribute_is_dropped test: it results in an assertion failure if the view in fact contains any Vars referencing the dropped column. Once we've got dependencies preventing such cases, we'll probably want to do it that way instead of relying on the empty-string test used here. This fix turned up a very ancient bug in outfuncs/readfuncs, namely that T_String nodes containing empty strings were not dumped/reloaded correctly: the node was printed as "<>" which is read as a string value of <>. Since (per SQL) we disallow empty-string identifiers, such nodes don't occur normally, which is why we'd not noticed. (Such nodes aren't used for literal constants, just identifiers.) Per report from Marc Schablewski. Back-patch to 9.3 which is where the rule printing behavior changed. The dangling-variable case is broken all the way back, but that's not what his complaint is about.
2014-07-19 20:28:22 +02:00
/*
* We use _outToken to provide escaping of the string's content,
* but we don't want it to do anything with an empty string.
*/
appendStringInfoChar(str, '"');
Partial fix for dropped columns in functions returning composite. When a view has a function-returning-composite in FROM, and there are some dropped columns in the underlying composite type, ruleutils.c printed junk in the column alias list for the reconstructed FROM entry. Before 9.3, this was prevented by doing get_rte_attribute_is_dropped tests while printing the column alias list; but that solution is not currently available to us for reasons I'll explain below. Instead, check for empty-string entries in the alias list, which can only exist if that column position had been dropped at the time the view was made. (The parser fills in empty strings to preserve the invariant that the aliases correspond to physical column positions.) While this is sufficient to handle the case of columns dropped before the view was made, we have still got issues with columns dropped after the view was made. In particular, the view could contain Vars that explicitly reference such columns! The dependency machinery really ought to refuse the column drop attempt in such cases, as it would do when trying to drop a table column that's explicitly referenced in views. However, we currently neglect to store dependencies on columns of composite types, and fixing that is likely to be too big to be back-patchable (not to mention that existing views in existing databases would not have the needed pg_depend entries anyway). So I'll leave that for a separate patch. Pre-9.3, ruleutils would print such Vars normally (with their original column names) even though it suppressed their entries in the RTE's column alias list. This is certainly bogus, since the printed view definition would fail to reload, but at least it didn't crash. However, as of 9.3 the printed column alias list is tightly tied to the names printed for Vars; so we can't treat columns as dropped for one purpose and not dropped for the other. This is why we can't just put back the get_rte_attribute_is_dropped test: it results in an assertion failure if the view in fact contains any Vars referencing the dropped column. Once we've got dependencies preventing such cases, we'll probably want to do it that way instead of relying on the empty-string test used here. This fix turned up a very ancient bug in outfuncs/readfuncs, namely that T_String nodes containing empty strings were not dumped/reloaded correctly: the node was printed as "<>" which is read as a string value of <>. Since (per SQL) we disallow empty-string identifiers, such nodes don't occur normally, which is why we'd not noticed. (Such nodes aren't used for literal constants, just identifiers.) Per report from Marc Schablewski. Back-patch to 9.3 which is where the rule printing behavior changed. The dangling-variable case is broken all the way back, but that's not what his complaint is about.
2014-07-19 20:28:22 +02:00
if (value->val.str[0] != '\0')
_outToken(str, value->val.str);
appendStringInfoChar(str, '"');
break;
case T_BitString:
/* internal representation already has leading 'b' */
appendStringInfoString(str, value->val.str);
break;
case T_Null:
/* this is seen only within A_Const, not in transformed trees */
appendStringInfoString(str, "NULL");
break;
default:
elog(ERROR, "unrecognized node type: %d", (int) value->type);
break;
}
}
static void
_outColumnRef(StringInfo str, const ColumnRef *node)
{
WRITE_NODE_TYPE("COLUMNREF");
WRITE_NODE_FIELD(fields);
WRITE_LOCATION_FIELD(location);
}
static void
_outParamRef(StringInfo str, const ParamRef *node)
{
WRITE_NODE_TYPE("PARAMREF");
WRITE_INT_FIELD(number);
WRITE_LOCATION_FIELD(location);
}
static void
_outAConst(StringInfo str, const A_Const *node)
{
WRITE_NODE_TYPE("A_CONST");
appendStringInfoString(str, " :val ");
_outValue(str, &(node->val));
WRITE_LOCATION_FIELD(location);
}
static void
_outA_Star(StringInfo str, const A_Star *node)
{
WRITE_NODE_TYPE("A_STAR");
}
static void
_outA_Indices(StringInfo str, const A_Indices *node)
{
WRITE_NODE_TYPE("A_INDICES");
WRITE_BOOL_FIELD(is_slice);
WRITE_NODE_FIELD(lidx);
WRITE_NODE_FIELD(uidx);
}
static void
_outA_Indirection(StringInfo str, const A_Indirection *node)
{
WRITE_NODE_TYPE("A_INDIRECTION");
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(indirection);
}
static void
_outA_ArrayExpr(StringInfo str, const A_ArrayExpr *node)
{
WRITE_NODE_TYPE("A_ARRAYEXPR");
WRITE_NODE_FIELD(elements);
WRITE_LOCATION_FIELD(location);
}
static void
_outResTarget(StringInfo str, const ResTarget *node)
{
WRITE_NODE_TYPE("RESTARGET");
WRITE_STRING_FIELD(name);
WRITE_NODE_FIELD(indirection);
WRITE_NODE_FIELD(val);
WRITE_LOCATION_FIELD(location);
}
static void
_outMultiAssignRef(StringInfo str, const MultiAssignRef *node)
{
WRITE_NODE_TYPE("MULTIASSIGNREF");
WRITE_NODE_FIELD(source);
WRITE_INT_FIELD(colno);
WRITE_INT_FIELD(ncolumns);
}
static void
_outSortBy(StringInfo str, const SortBy *node)
{
WRITE_NODE_TYPE("SORTBY");
WRITE_NODE_FIELD(node);
WRITE_ENUM_FIELD(sortby_dir, SortByDir);
WRITE_ENUM_FIELD(sortby_nulls, SortByNulls);
WRITE_NODE_FIELD(useOp);
WRITE_LOCATION_FIELD(location);
}
static void
_outWindowDef(StringInfo str, const WindowDef *node)
{
WRITE_NODE_TYPE("WINDOWDEF");
WRITE_STRING_FIELD(name);
WRITE_STRING_FIELD(refname);
WRITE_NODE_FIELD(partitionClause);
WRITE_NODE_FIELD(orderClause);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
WRITE_LOCATION_FIELD(location);
}
static void
_outRangeSubselect(StringInfo str, const RangeSubselect *node)
{
WRITE_NODE_TYPE("RANGESUBSELECT");
WRITE_BOOL_FIELD(lateral);
WRITE_NODE_FIELD(subquery);
WRITE_NODE_FIELD(alias);
}
static void
_outRangeFunction(StringInfo str, const RangeFunction *node)
{
WRITE_NODE_TYPE("RANGEFUNCTION");
WRITE_BOOL_FIELD(lateral);
WRITE_BOOL_FIELD(ordinality);
WRITE_BOOL_FIELD(is_rowsfrom);
WRITE_NODE_FIELD(functions);
WRITE_NODE_FIELD(alias);
WRITE_NODE_FIELD(coldeflist);
}
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
static void
_outRangeTableSample(StringInfo str, const RangeTableSample *node)
{
WRITE_NODE_TYPE("RANGETABLESAMPLE");
WRITE_NODE_FIELD(relation);
WRITE_NODE_FIELD(method);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(repeatable);
WRITE_LOCATION_FIELD(location);
}
1998-12-04 16:34:49 +01:00
static void
_outConstraint(StringInfo str, const Constraint *node)
1998-12-04 16:34:49 +01:00
{
WRITE_NODE_TYPE("CONSTRAINT");
WRITE_STRING_FIELD(conname);
WRITE_BOOL_FIELD(deferrable);
WRITE_BOOL_FIELD(initdeferred);
WRITE_LOCATION_FIELD(location);
1998-12-04 16:34:49 +01:00
appendStringInfoString(str, " :contype ");
1998-12-04 16:34:49 +01:00
switch (node->contype)
{
case CONSTR_NULL:
appendStringInfoString(str, "NULL");
break;
case CONSTR_NOTNULL:
appendStringInfoString(str, "NOT_NULL");
break;
case CONSTR_DEFAULT:
appendStringInfoString(str, "DEFAULT");
WRITE_NODE_FIELD(raw_expr);
WRITE_STRING_FIELD(cooked_expr);
break;
case CONSTR_CHECK:
appendStringInfoString(str, "CHECK");
WRITE_BOOL_FIELD(is_no_inherit);
WRITE_NODE_FIELD(raw_expr);
WRITE_STRING_FIELD(cooked_expr);
break;
1998-12-04 16:34:49 +01:00
case CONSTR_PRIMARY:
appendStringInfoString(str, "PRIMARY_KEY");
WRITE_NODE_FIELD(keys);
WRITE_NODE_FIELD(options);
WRITE_STRING_FIELD(indexname);
WRITE_STRING_FIELD(indexspace);
/* access_method and where_clause not currently used */
break;
case CONSTR_UNIQUE:
appendStringInfoString(str, "UNIQUE");
WRITE_NODE_FIELD(keys);
WRITE_NODE_FIELD(options);
WRITE_STRING_FIELD(indexname);
WRITE_STRING_FIELD(indexspace);
/* access_method and where_clause not currently used */
break;
case CONSTR_EXCLUSION:
appendStringInfoString(str, "EXCLUSION");
WRITE_NODE_FIELD(exclusions);
WRITE_NODE_FIELD(options);
WRITE_STRING_FIELD(indexname);
WRITE_STRING_FIELD(indexspace);
WRITE_STRING_FIELD(access_method);
WRITE_NODE_FIELD(where_clause);
1998-12-04 16:34:49 +01:00
break;
case CONSTR_FOREIGN:
appendStringInfoString(str, "FOREIGN_KEY");
WRITE_NODE_FIELD(pktable);
WRITE_NODE_FIELD(fk_attrs);
WRITE_NODE_FIELD(pk_attrs);
WRITE_CHAR_FIELD(fk_matchtype);
WRITE_CHAR_FIELD(fk_upd_action);
WRITE_CHAR_FIELD(fk_del_action);
WRITE_NODE_FIELD(old_conpfeqop);
WRITE_OID_FIELD(old_pktable_oid);
WRITE_BOOL_FIELD(skip_validation);
WRITE_BOOL_FIELD(initially_valid);
1998-12-04 16:34:49 +01:00
break;
case CONSTR_ATTR_DEFERRABLE:
appendStringInfoString(str, "ATTR_DEFERRABLE");
1998-12-04 16:34:49 +01:00
break;
case CONSTR_ATTR_NOT_DEFERRABLE:
appendStringInfoString(str, "ATTR_NOT_DEFERRABLE");
1998-12-04 16:34:49 +01:00
break;
case CONSTR_ATTR_DEFERRED:
appendStringInfoString(str, "ATTR_DEFERRED");
1998-12-04 16:34:49 +01:00
break;
case CONSTR_ATTR_IMMEDIATE:
appendStringInfoString(str, "ATTR_IMMEDIATE");
break;
default:
appendStringInfo(str, "<unrecognized_constraint %d>",
(int) node->contype);
break;
}
}
/*
* _outNode -
* converts a Node into ascii string and append it to 'str'
*/
static void
_outNode(StringInfo str, const void *obj)
{
if (obj == NULL)
appendStringInfoString(str, "<>");
2004-08-29 07:07:03 +02:00
else if (IsA(obj, List) ||IsA(obj, IntList) || IsA(obj, OidList))
_outList(str, obj);
else if (IsA(obj, Integer) ||
IsA(obj, Float) ||
IsA(obj, String) ||
IsA(obj, BitString))
{
/* nodeRead does not want to see { } around these! */
_outValue(str, obj);
}
else
{
appendStringInfoChar(str, '{');
switch (nodeTag(obj))
{
case T_PlannedStmt:
_outPlannedStmt(str, obj);
break;
case T_Plan:
_outPlan(str, obj);
break;
case T_Result:
_outResult(str, obj);
break;
case T_ModifyTable:
_outModifyTable(str, obj);
break;
case T_Append:
_outAppend(str, obj);
break;
case T_MergeAppend:
_outMergeAppend(str, obj);
break;
case T_RecursiveUnion:
_outRecursiveUnion(str, obj);
break;
case T_BitmapAnd:
_outBitmapAnd(str, obj);
break;
case T_BitmapOr:
_outBitmapOr(str, obj);
break;
case T_Gather:
_outGather(str, obj);
break;
case T_Scan:
_outScan(str, obj);
break;
case T_SeqScan:
_outSeqScan(str, obj);
break;
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
case T_SampleScan:
_outSampleScan(str, obj);
break;
case T_IndexScan:
_outIndexScan(str, obj);
break;
case T_IndexOnlyScan:
_outIndexOnlyScan(str, obj);
break;
case T_BitmapIndexScan:
_outBitmapIndexScan(str, obj);
break;
case T_BitmapHeapScan:
_outBitmapHeapScan(str, obj);
break;
case T_TidScan:
_outTidScan(str, obj);
break;
case T_SubqueryScan:
_outSubqueryScan(str, obj);
break;
case T_FunctionScan:
_outFunctionScan(str, obj);
break;
case T_ValuesScan:
_outValuesScan(str, obj);
break;
case T_CteScan:
_outCteScan(str, obj);
break;
case T_WorkTableScan:
_outWorkTableScan(str, obj);
break;
case T_ForeignScan:
_outForeignScan(str, obj);
break;
case T_CustomScan:
_outCustomScan(str, obj);
break;
case T_Join:
_outJoin(str, obj);
break;
case T_NestLoop:
_outNestLoop(str, obj);
break;
case T_MergeJoin:
_outMergeJoin(str, obj);
break;
case T_HashJoin:
_outHashJoin(str, obj);
break;
case T_Agg:
_outAgg(str, obj);
break;
case T_WindowAgg:
_outWindowAgg(str, obj);
break;
case T_Group:
_outGroup(str, obj);
break;
case T_Material:
_outMaterial(str, obj);
break;
case T_Sort:
_outSort(str, obj);
break;
case T_Unique:
_outUnique(str, obj);
break;
case T_Hash:
_outHash(str, obj);
break;
case T_SetOp:
_outSetOp(str, obj);
break;
case T_LockRows:
_outLockRows(str, obj);
break;
case T_Limit:
_outLimit(str, obj);
break;
case T_NestLoopParam:
_outNestLoopParam(str, obj);
break;
case T_PlanRowMark:
_outPlanRowMark(str, obj);
break;
case T_PlanInvalItem:
_outPlanInvalItem(str, obj);
break;
case T_Alias:
_outAlias(str, obj);
break;
case T_RangeVar:
_outRangeVar(str, obj);
break;
case T_IntoClause:
_outIntoClause(str, obj);
break;
case T_Var:
_outVar(str, obj);
break;
case T_Const:
_outConst(str, obj);
break;
case T_Param:
_outParam(str, obj);
break;
1999-01-24 01:28:37 +01:00
case T_Aggref:
_outAggref(str, obj);
break;
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
case T_GroupingFunc:
_outGroupingFunc(str, obj);
break;
case T_WindowFunc:
_outWindowFunc(str, obj);
break;
case T_ArrayRef:
_outArrayRef(str, obj);
break;
case T_FuncExpr:
_outFuncExpr(str, obj);
break;
case T_NamedArgExpr:
_outNamedArgExpr(str, obj);
break;
case T_OpExpr:
_outOpExpr(str, obj);
break;
case T_DistinctExpr:
_outDistinctExpr(str, obj);
break;
case T_NullIfExpr:
_outNullIfExpr(str, obj);
break;
case T_ScalarArrayOpExpr:
_outScalarArrayOpExpr(str, obj);
break;
case T_BoolExpr:
_outBoolExpr(str, obj);
break;
case T_SubLink:
_outSubLink(str, obj);
break;
case T_SubPlan:
_outSubPlan(str, obj);
break;
case T_AlternativeSubPlan:
_outAlternativeSubPlan(str, obj);
break;
case T_FieldSelect:
_outFieldSelect(str, obj);
break;
case T_FieldStore:
_outFieldStore(str, obj);
break;
case T_RelabelType:
_outRelabelType(str, obj);
break;
case T_CoerceViaIO:
_outCoerceViaIO(str, obj);
break;
case T_ArrayCoerceExpr:
_outArrayCoerceExpr(str, obj);
break;
case T_ConvertRowtypeExpr:
_outConvertRowtypeExpr(str, obj);
break;
case T_CollateExpr:
_outCollateExpr(str, obj);
break;
case T_CaseExpr:
_outCaseExpr(str, obj);
break;
case T_CaseWhen:
_outCaseWhen(str, obj);
break;
case T_CaseTestExpr:
_outCaseTestExpr(str, obj);
break;
case T_ArrayExpr:
_outArrayExpr(str, obj);
break;
case T_RowExpr:
_outRowExpr(str, obj);
break;
case T_RowCompareExpr:
_outRowCompareExpr(str, obj);
break;
case T_CoalesceExpr:
_outCoalesceExpr(str, obj);
break;
case T_MinMaxExpr:
_outMinMaxExpr(str, obj);
break;
case T_XmlExpr:
_outXmlExpr(str, obj);
break;
case T_NullTest:
_outNullTest(str, obj);
break;
case T_BooleanTest:
_outBooleanTest(str, obj);
break;
case T_CoerceToDomain:
_outCoerceToDomain(str, obj);
break;
case T_CoerceToDomainValue:
_outCoerceToDomainValue(str, obj);
break;
case T_SetToDefault:
_outSetToDefault(str, obj);
break;
case T_CurrentOfExpr:
_outCurrentOfExpr(str, obj);
break;
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
case T_InferenceElem:
_outInferenceElem(str, obj);
break;
case T_TargetEntry:
_outTargetEntry(str, obj);
break;
case T_RangeTblRef:
_outRangeTblRef(str, obj);
break;
case T_JoinExpr:
_outJoinExpr(str, obj);
break;
case T_FromExpr:
_outFromExpr(str, obj);
break;
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
case T_OnConflictExpr:
_outOnConflictExpr(str, obj);
break;
case T_Path:
_outPath(str, obj);
break;
case T_IndexPath:
_outIndexPath(str, obj);
break;
case T_BitmapHeapPath:
_outBitmapHeapPath(str, obj);
break;
case T_BitmapAndPath:
_outBitmapAndPath(str, obj);
break;
case T_BitmapOrPath:
_outBitmapOrPath(str, obj);
break;
case T_TidPath:
_outTidPath(str, obj);
break;
case T_ForeignPath:
_outForeignPath(str, obj);
break;
case T_CustomPath:
_outCustomPath(str, obj);
break;
case T_AppendPath:
_outAppendPath(str, obj);
break;
case T_MergeAppendPath:
_outMergeAppendPath(str, obj);
break;
case T_ResultPath:
_outResultPath(str, obj);
break;
case T_MaterialPath:
_outMaterialPath(str, obj);
break;
case T_UniquePath:
_outUniquePath(str, obj);
break;
case T_GatherPath:
_outGatherPath(str, obj);
break;
1999-02-12 07:43:53 +01:00
case T_NestPath:
_outNestPath(str, obj);
break;
case T_MergePath:
_outMergePath(str, obj);
break;
case T_HashPath:
_outHashPath(str, obj);
break;
case T_PlannerGlobal:
_outPlannerGlobal(str, obj);
break;
case T_PlannerInfo:
_outPlannerInfo(str, obj);
break;
case T_RelOptInfo:
_outRelOptInfo(str, obj);
break;
case T_IndexOptInfo:
_outIndexOptInfo(str, obj);
break;
case T_EquivalenceClass:
_outEquivalenceClass(str, obj);
break;
case T_EquivalenceMember:
_outEquivalenceMember(str, obj);
break;
case T_PathKey:
_outPathKey(str, obj);
break;
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
case T_ParamPathInfo:
_outParamPathInfo(str, obj);
break;
case T_RestrictInfo:
_outRestrictInfo(str, obj);
break;
case T_PlaceHolderVar:
_outPlaceHolderVar(str, obj);
break;
case T_SpecialJoinInfo:
_outSpecialJoinInfo(str, obj);
break;
case T_AppendRelInfo:
_outAppendRelInfo(str, obj);
break;
case T_PlaceHolderInfo:
_outPlaceHolderInfo(str, obj);
break;
case T_MinMaxAggInfo:
_outMinMaxAggInfo(str, obj);
break;
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
case T_CreateStmt:
_outCreateStmt(str, obj);
break;
case T_CreateForeignTableStmt:
_outCreateForeignTableStmt(str, obj);
break;
case T_ImportForeignSchemaStmt:
_outImportForeignSchemaStmt(str, obj);
break;
case T_IndexStmt:
_outIndexStmt(str, obj);
break;
case T_NotifyStmt:
_outNotifyStmt(str, obj);
break;
case T_DeclareCursorStmt:
_outDeclareCursorStmt(str, obj);
break;
case T_SelectStmt:
_outSelectStmt(str, obj);
break;
case T_ColumnDef:
_outColumnDef(str, obj);
break;
case T_TypeName:
_outTypeName(str, obj);
break;
case T_TypeCast:
_outTypeCast(str, obj);
break;
case T_CollateClause:
_outCollateClause(str, obj);
break;
case T_IndexElem:
_outIndexElem(str, obj);
break;
case T_Query:
_outQuery(str, obj);
break;
case T_WithCheckOption:
_outWithCheckOption(str, obj);
break;
case T_SortGroupClause:
_outSortGroupClause(str, obj);
break;
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
case T_GroupingSet:
_outGroupingSet(str, obj);
break;
case T_WindowClause:
_outWindowClause(str, obj);
break;
case T_RowMarkClause:
_outRowMarkClause(str, obj);
break;
case T_WithClause:
_outWithClause(str, obj);
break;
case T_CommonTableExpr:
_outCommonTableExpr(str, obj);
break;
case T_SetOperationStmt:
_outSetOperationStmt(str, obj);
break;
case T_RangeTblEntry:
_outRangeTblEntry(str, obj);
break;
case T_RangeTblFunction:
_outRangeTblFunction(str, obj);
break;
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
case T_TableSampleClause:
_outTableSampleClause(str, obj);
break;
case T_A_Expr:
_outAExpr(str, obj);
break;
case T_ColumnRef:
_outColumnRef(str, obj);
break;
case T_ParamRef:
_outParamRef(str, obj);
break;
case T_A_Const:
_outAConst(str, obj);
break;
case T_A_Star:
_outA_Star(str, obj);
break;
case T_A_Indices:
_outA_Indices(str, obj);
break;
case T_A_Indirection:
_outA_Indirection(str, obj);
break;
case T_A_ArrayExpr:
_outA_ArrayExpr(str, obj);
break;
case T_ResTarget:
_outResTarget(str, obj);
break;
case T_MultiAssignRef:
_outMultiAssignRef(str, obj);
break;
case T_SortBy:
_outSortBy(str, obj);
break;
case T_WindowDef:
_outWindowDef(str, obj);
break;
case T_RangeSubselect:
_outRangeSubselect(str, obj);
break;
case T_RangeFunction:
_outRangeFunction(str, obj);
break;
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
case T_RangeTableSample:
_outRangeTableSample(str, obj);
break;
1998-12-04 16:34:49 +01:00
case T_Constraint:
_outConstraint(str, obj);
break;
case T_FuncCall:
_outFuncCall(str, obj);
break;
case T_DefElem:
_outDefElem(str, obj);
break;
case T_TableLikeClause:
_outTableLikeClause(str, obj);
break;
case T_LockingClause:
_outLockingClause(str, obj);
break;
case T_XmlSerialize:
_outXmlSerialize(str, obj);
break;
default:
2003-08-04 02:43:34 +02:00
/*
2005-10-15 04:49:52 +02:00
* This should be an ERROR, but it's too useful to be able to
* dump structures that _outNode only understands part of.
*/
elog(WARNING, "could not dump unrecognized node type: %d",
(int) nodeTag(obj));
break;
}
appendStringInfoChar(str, '}');
}
}
/*
* nodeToString -
* returns the ascii representation of the Node as a palloc'd string
*/
char *
nodeToString(const void *obj)
{
1999-05-25 18:15:34 +02:00
StringInfoData str;
/* see stringinfo.h for an explanation of this maneuver */
initStringInfo(&str);
_outNode(&str, obj);
return str.data;
}